hadoop TopN

1.Mysort

package s27;

import org.apache.hadoop.io.IntWritable;

public class MySort extends IntWritable.Comparator {

@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {

    //jiangxu
    return -super.compare(b1, s1, l1, b2, s2, l2);
}

}

2.MyMap

package s27;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;

public class MyMap extends Mapper<LongWritable,Text,IntWritable,Text> {

IntWritable ha_numReal=new IntWritable();
Text ha_name=new Text();

@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
    //super.map(key, value, context);
    String line = value.toString();

    String[] info = line.split(" ");
    String name=info[0];
    String num = info[1];

    //String --> int
    int numReal = Integer.parseInt(num);

    //java --> hadoop
    ha_numReal.set(numReal);
    ha_name.set(name);

    context.write(ha_numReal,ha_name);

}

}

3.MyRed

package s27;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class MyRed extends Reducer<IntWritable,Text,IntWritable,Text> {

int count=0;

@Override
protected void reduce(IntWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
    for (Text t :values){
        //1. if>=3
        if (count>=3){
            break;
        }
        context.write(key,t);
        count++;
    }
}

}

4.MyJob

上一篇:MapReduce的分组topN高效实现


下一篇:数据可视化之powerBI基础(十九)学会使用Power BI的参数,轻松搞定动态分析