页面浏览量统计功能实现:
统计页面量功能:
思路:统计页面浏览功能就是统计访问记录的总条数,因为还没学习SQL方式进行统计,使用MapReduce变成的方式,我要做的就是把一行记录做成一个固定的key,然后value复制为1,在Reduce阶段解析累脚操作
用到了GetPageId
Mapper类:
static class LogMapper extends Mapper<LongWritable, Text, Text, IntWritable>{ //定义ONE全局变量赋值1 private IntWritable ONE = new IntWritable(1); @Override protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, IntWritable>.Context context) throws IOException, InterruptedException { //读取一行日志 String log = value.toString(); //解析日志然后查询url Map<String, String> info = new LogParser().parse(log); String url = info.get("url");//key //通过url找id String id = new GetPageId().getPageId(url); //写入上下文 context.write(new Text(id), ONE); } }
Reducer类
static class LogReducer extends Reducer<Text, IntWritable, NullWritable, IntWritable>{ @Override protected void reduce(Text key, Iterable<IntWritable> values, Reducer<Text, IntWritable, NullWritable, IntWritable>.Context context) throws IOException, InterruptedException { //定义一个空变量 int sum = 0 ; //遍历 for (IntWritable value : values) { sum += value.get(); } //将结果写入上下文 context.write(NullWritable.get(), new IntWritable(sum)); } }
Submit类
public static void main(String[] args) throws Exception { // 加载配置文件 Configuration conf = new Configuration(); //创建hdfs对象 FileSystem fs = FileSystem.get(conf); //判断输出路径是否重复 if(fs.exists(new Path(args[1]))) { fs.delete(new Path(args[1]),true); } // 创建Job对象 Job job = Job.getInstance(conf); // 设置提交主类 job.setJarByClass(PathApp.class); // 设置Mapper类相关的参数 job.setMapperClass(LogMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); // 设置Reducer类相关的参数 job.setReducerClass(LogReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // 设置输入路径 FileInputFormat.setInputPaths(job, new Path(args[0])); // 设置输出路径 FileOutputFormat.setOutputPath(job, new Path(args[1])); // 提交任务 job.waitForCompletion(true); }