开发者学堂课程【Hadoop 分布式计算框架 MapReduc: NLineInputFormat 案例实现】学习笔记,与课程紧密联系,让用户快速学习知识。
课程地址:https://developer.aliyun.com/learning/course/94/detail/1507
NLineInputFormat 案例实现
目录:
一、编写 Mapper 类
二、编写 Reducer 类
三、编写 Driver 类
一、 编写 Mapper 类
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class NLineMapper extends Mapper<LongNritable, Text, Text, IntWritable>(
Text k = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException (
banzhang ni hao
// 1获取一行
String line = value.toString();
// 2 切割
String[] words = line.split("");
// 3 循环写出
for (String word : words)
k.set(word);
context.write(k, v)
二、 编写 Reducer 类
package com.atguigu.mr.nline;
import java.io.IOException;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class NLineReducer extends Reducer<Text, IntWritable, Text, IntWritable>(
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context) throws IOException, InterruptedException (
// 1累加求和
int sum =0;
for (IntWritable value: values)
sum += value.get();
IntWritable V= new IntWritable();
//2写出
context unite(kav valie).
三、 编写 Driver 类
输入输出路径需要根据自己电脑上实际的输入输出路径设置 args = new String[] ("e:/input/inputword", "e:/output1");
// 1获取 job 对象
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
// 7设置每个切片 InputSplit 中划分三条记录
NLineInputFormat.setNumLinesPerSplit(job,3);
// 8使用 NLineInputFormat 处理记录数
job.setInputFormatClass(NLineInputFormat.class);
//2设置 jar 包位置,关联 mapper 和 reducer
job.setJarByClass(NLineDriver.class);
job.setMapperClass(NLineMapper.class);
job.setReducerClass(NLineReducer.class); I
// 3设置 map 输出 kv 类型
job.setMap0utputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// 4设置最终输出 kv 类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 5设置输入输出数据路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
File0utputFormat.setOutputPath(job, new Path(args[1]));