我是大数据世界的新手,并为处理由Web应用程序生成的日志分配了POC。我已经在linux VM上成功地在伪分布式模式下设置了hadoop,并设法使用flume从Windows服务器注入Web服务器日志到hdfs。接下来我编写了一个用于日志分析的mapreduce程序,在eclipse中正常运行。但是当我导出jar并将其移动到hadoop VM时,作业成功完成,在hdfs但part- * file中创建的输出目录为空。我通过测试本地eclipse上的代码验证了我的输入数据集。我试过远程应用程序调试,main方法中的断点命中但map方法中的断点没有。任何帮助都会得到满足。
我已经在互联网上搜索了足够但无法找到类似的内容。以下是我的代码
public class OneLoadTransactionsSuccessCount {
public static class OneLoadLogMapper extends Mapper<Object, Text, Text, IntWritable> {
private final static IntWritable one = new IntWritable(1);
private Text word = new Text();
private static final Logger logger = Logger.getLogger(OneLoadLogMapper.class);
private final static String SUCCESS_CODE = "0";
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
SimpleDateFormat sdf = new SimpleDateFormat("hh:mm dd MMM, yyyy");
String text = value.toString();
logger.info("text:::: " + text);
System.out.println("text:::: " + text);
int startingTag = text.indexOf("[#");
int endingTag = text.indexOf("#]");
if (startingTag != -1 && endingTag != -1) {
try {
String completeLog = text.substring(startingTag + 1, endingTag);
logger.info("completeLog:::: " + completeLog);
System.out.println("completeLog:::: " + completeLog);
String[] tokens = completeLog.split("\\|");
if (tokens != null && tokens.length > 0) {
logger.info(tokens[1]);
System.out.println(tokens[1]);
if (tokens[6] != null) {
String responseXML = tokens[6];
logger.info("responseXML:::: " + responseXML);
System.out.println("responseXML:::: " + responseXML);
JAXBContext jaxbContext = JAXBContext.newInstance(LoadResponseMsg.class);
Unmarshaller jaxbUnmarshaller = jaxbContext.createUnmarshaller();
StringReader reader = new StringReader(responseXML);
InputStream inputStream = new ByteArrayInputStream(
responseXML.getBytes(Charset.forName("UTF-8")));
TransformerFactory factory = TransformerFactory.newInstance();
Source xslt = new StreamSource(new File("removeNs.xslt"));
Transformer transformer = factory.newTransformer(xslt);
Source src = new StreamSource(inputStream);
transformer.transform(src, new StreamResult(new File("tempoutput.xml")));
File responseXMLFile = new File("tempoutput.xml");
jaxbUnmarshaller.setEventHandler(new ValidationEventHandler() {
@Override
public boolean handleEvent(ValidationEvent event) {
throw new RuntimeException(event.getMessage(), event.getLinkedException());
}
});
LoadResponseMsg loadResponseMsg = (LoadResponseMsg) jaxbUnmarshaller
.unmarshal(responseXMLFile);
if (loadResponseMsg != null) {
logger.info("reader:::: " + reader.toString());
System.out.println("reader:::: " + reader.toString());
if (loadResponseMsg.getResponseHeader().getResponseCode()
.equalsIgnoreCase(SUCCESS_CODE)) {
logger.info("status::: " + loadResponseMsg.getLoadResponse().getDescription());
System.out
.println("status::: " + loadResponseMsg.getLoadResponse().getDescription());
word.set(loadResponseMsg.getLoadResponse().getCompanyShortName());
context.write(word, one);
}
}
}
}
} catch (JAXBException e) {
logger.error(e);
} catch (IOException e) {
logger.error(e);
} catch (Exception e) {
logger.error(e);
}
}
}
}
public static class OneLoadLogReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable result = new IntWritable();
public void reduce(Text key, Iterable<IntWritable> values, Context context)
throws IOException, InterruptedException {
int sum = 0;
for (IntWritable val : values) {
sum += val.get();
}
result.set(sum);
context.write(key, result);
}
}
public static void main(String[] args) throws Exception {
try {
Configuration conf = new Configuration();
System.out.println("in main method");
Job job = Job.getInstance(conf, "word count");
job.setJarByClass(OneLoadTransactionsSuccessCount.class);
job.setMapperClass(OneLoadLogMapper.class);
job.setCombinerClass(OneLoadLogReducer.class);
job.setReducerClass(OneLoadLogReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.out.println("about to run the job");
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (Exception e) {
System.err.println(e);
e.printStackTrace();
}
}
}
因此涉及IO操作。我在制作jar时将所需文件添加到类路径中,但我的开发环境是windows,我在Linux上运行它。问题是因为文件系统不同。该程序正在Linux中查找错误位置的文件。