一:读写思想
1.系统表
hbase:namespace
存储hbase中所有的namespace的信息
hbase:meta
rowkey:hbase中所有表的region的名称
column:regioninfo:region的名称,region的范围
server:该region在哪台regionserver上
2.读写流程
tbname,rowkey -> region -> regionserver -> store -> storefile
但是这些都是加载过meta表之后,然后meta表如何寻找?
3.读的流程
-》根据表名和rowkey找到对应的region
-》zookeeper中存储了meta表的region信息
-》从meta表中获取相应的region的信息
-》找到对应的regionserver
-》查找对应的region
-》读memstore
-》storefile
4.写的流程
-》根据表名和rowkey找到对应的region
-》zookeeper中存储了meta表的region信息
-》从meta表中获取相应的region的信息
-》找到对应的regionserver
-》正常情况
-》WAL(write ahead log预写日志),一个regionserver维护一个hlog
-》memstore (达到一定大小,flush到磁盘)
-》当多个storefile达到一定大小以后,会进行compact,合并成一个storefile
-》当单个storefile达到一定大小以后,会进行split操作,等分割region
5.注意点
关于版本的合并和删除是在compact阶段完成的。hbase只负责数据的增加存储
hmaster短暂的不参与实际的读写
二:HBase Client API 的书写
1.添加依赖
2.添加配置文件
core-site.xml
hdfs-site.xml
hbase-site.xml
log4j.properties
regionservers
3.get的书写
4.put的书写
5.delete的书写
注意全部删除:
6.scan的书写
7.过滤条件的scan的书写
三:复制源代码
package com.beifeng.bigdat; import java.io.IOException; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.util.Bytes; public class HbaseClientTest {
public static HTable getTable(String name) throws Exception{
Configuration conf=HBaseConfiguration.create();
HTable table=new HTable(conf,name);
return table; }
public static void getData(HTable table) throws Exception{
Get get=new Get(Bytes.toBytes("103"));
get.addFamily(Bytes.toBytes("info"));
Result rs=table.get(get);
for(Cell cell:rs.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))+"--"+
Bytes.toString(CellUtil.cloneQualifier(cell))+"---"+
Bytes.toString(CellUtil.cloneValue(cell))+"----"+
cell.getTimestamp()
);
System.out.println("----------------------------------------------");
}
} public static void putData(HTable table) throws Exception{
Put put=new Put(Bytes.toBytes("103"));
put.add(Bytes.toBytes("info"),
Bytes.toBytes("name"),
Bytes.toBytes("zhaoliu"));
table.put(put);
getData(table);
} public static void deleteData(HTable table) throws Exception{
Delete delete =new Delete(Bytes.toBytes("103"));
delete.deleteColumns(Bytes.toBytes("info"), Bytes.toBytes("name"));
table.delete(delete);
getData(table);
} public static void scanData(HTable table) throws Exception{
Scan scan =new Scan();
ResultScanner rs=table.getScanner(scan);
for(Result r:rs){
System.out.println(Bytes.toString(r.getRow()));
for(Cell cell:r.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))+"---"+
Bytes.toString(CellUtil.cloneQualifier(cell))+"---"+
Bytes.toString(CellUtil.cloneValue(cell))+"--"+
cell.getTimestamp()
);
System.out.println();
}
}
} public static void filterScan(HTable table) throws Exception{
Scan scan =new Scan();
Filter filter=new PrefixFilter(Bytes.toBytes("10"));
scan.setFilter(filter);
scan.setCacheBlocks(true);
scan.setCaching(1000);
scan.setBatch(100);
ResultScanner rs=table.getScanner(scan);
for(Result r:rs){
System.out.println(Bytes.toString(r.getRow()));
for(Cell cell:r.rawCells()){
System.out.println(
Bytes.toString(CellUtil.cloneFamily(cell))+"---"+
Bytes.toString(CellUtil.cloneQualifier(cell))+"---"+
Bytes.toString(CellUtil.cloneValue(cell))+"--"+
cell.getTimestamp()
);
System.out.println();
}
} } public static void main(String[] args) throws Exception {
HTable table=getTable("nstest1:tb1");
//getData(table);
//putData(table);
//deleteData(table);
//scanData(table);
filterScan(table);
} }