hbase的wordcount

package com.neworigin.HBaseMR;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer; public class HbaseMRTest {
static Configuration conf=null;
static{
// 配置configuration的三种方法
// ①直接将hbase-site.xml拿来放到src下面
conf=HBaseConfiguration.create();
// ②设置服务器和端口
// conf.set("hbase.zookeeper.quorum", "s100:2181,s101:2181,s102:2181");
// ③扥开设置服务器和端口
conf.set("hbase.zookeeper.quorum", "s100,s101,s102");
conf.set("hbase.zookeeper.property.clientPort", "2181");
} //// 表信息
// public static final String tablename="wordtest";//表一
// public static final String colf="content";//表列族
// public static final String col="info";//列
//
// public static final String tablename2="stat";//表二
//
//public static void initTB(){
//
//}
public static class HBmapper extends TableMapper<Text,IntWritable>/*输出类型*/{
private static IntWritable one=new IntWritable(1);
private static Text word =new Text();
// 输入类型,key:row key value:一行数据的结果集 result
protected void map(ImmutableBytesWritable key, Result value,
Mapper<ImmutableBytesWritable, Result, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
for(Cell cell:value.rawCells())
{
word.set(CellUtil.cloneValue(cell));//读取值
context.write(word, one);//输出:单词----1
} }
}
public static class HBreducer extends TableReducer<Text,IntWritable,ImmutableBytesWritable>{ @Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, ImmutableBytesWritable, Mutation>.Context context)
throws IOException, InterruptedException { int sum=0;
//叠加单词个数
for(IntWritable value:values)
{
sum+=value.get();
}
Put put = new Put(Bytes.toBytes(key.toString()));//设置row key为单词
put.add(Bytes.toBytes("content"), Bytes.toBytes("info"), Bytes.toBytes(String.valueOf(sum)));
//写到hbase中的需要指定rowkey和put
context.write(new ImmutableBytesWritable(Bytes.toBytes(key.toString())), put);
} }
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Job job=new Job(conf,"HBaseMR");
Scan scan =new Scan();
TableMapReduceUtil.initTableMapperJob("wordtest",scan, HBmapper.class, Text.class, IntWritable.class, job);
TableMapReduceUtil.initTableReducerJob("stat", HBreducer.class, job); job.waitForCompletion(true);
System.out.println("finished");
}
}
上一篇:asp.net mvc通过预处理实现数据过滤和数据篡改。


下一篇:MySql 删除相同前缀的表名