Java大数据开发(三)Hadoop(22)-NLineInputFormat案例

导读：上一节我们讲解了FileInputFormat实现类有很多，本节讲解实现类NLineInputFormat的案例操作。

hadoop ni haoxiaoming hive helloworldhadoop ni haoxiaoming hive helloworldhadoop ni haoxiaoming hive helloworldhadoop ni haoxiaoming hive helloworldhadoop ni haoxiaoming hive helloworldxiaoming hive helloworld

public class NLineMapper extends Mapper<LongWritable, Text, Text, LongWritable>{ private Text k = new Text(); private LongWritable v = new LongWritable(1); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 1 获取一行 String line = value.toString(); // 2 切割 String[] splited = line.split(" "); // 3 循环写出 for (int i = 0; i < splited.length; i++) { k.set(splited[i]); context.write(k, v); } }}

public class NLineReducer extends Reducer<Text, LongWritable, Text, LongWritable>{ LongWritable v = new LongWritable(); @Override protected void reduce(Text key, Iterable<LongWritable> values, Context context) throws IOException, InterruptedException { long sum = 0l; // 1 汇总 for (LongWritable value : values) { sum += value.get(); } v.set(sum); // 2 输出 context.write(key, v); }}

public class NLineDriver { public static void main(String[] args) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException {// 输入输出路径需要根据自己电脑上实际的输入输出路径设置args = new String[] { "d:/input/inputword", "d:/output1" }; // 1 获取job对象 Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); // 7设置每个切片InputSplit中划分三条记录 NLineInputFormat.setNumLinesPerSplit(job, 3); // 8使用NLineInputFormat处理记录数 job.setInputFormatClass(NLineInputFormat.class); // 2设置jar包位置，关联mapper和reducer job.setJarByClass(NLineDriver.class); job.setMapperClass(NLineMapper.class); job.setReducerClass(NLineReducer.class); // 3设置map输出kv类型 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); // 4设置最终输出kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); // 5设置输入输出数据路径 FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); // 6提交job job.waitForCompletion(true); }}

vlambda博客
学习文章列表