# 需要软件
Hadoop-2.2.0(目前Apache官网最新的Stable版本)
Hbase-0.96.2(这里就用这个版本,跟Hadoop-2.2.0是配套的,不用覆盖jar包什么的)
Hive-0.13.1(目前是最新版本)
Zookeepr-3.4.6(这里推荐使用 3.4.5)
Jdk1.7.0_60(这里推荐使用1.7.0_45)
Mysql-5.5.31
# 集群结构图
NN : NameNode
JN : JournalNode
DN : DataNode
ZK : ZooKeeper
HM:HMster
HRS:HRegionServer
IP地址 |
主机名 |
ZK |
NN |
JN |
DN |
HRS |
HM |
HIVE |
192.168.1.229 |
rs229 |
是 |
是 |
是 |
是 |
否 |
是 |
是 |
192.168.1.227 |
rs227 |
是 |
是 |
是 |
是 |
否 |
是 |
否 |
192.168.1.226 |
rs226 |
是 |
否 |
是 |
是 |
是 |
否 |
否 |
192.168.1.198 |
rs198 |
是 |
否 |
否 |
是 |
是 |
否 |
否 |
192.168.1.197 |
rs197 |
是 |
否 |
否 |
是 |
是 |
否 |
否 |
192.168.1.196 |
rs196 |
否 |
否 |
否 |
是 |
是 |
否 |
否 |
192.168.1.195 |
rs195 |
否 |
否 |
否 |
是 |
是 |
否 |
否 |
# Zookeeper-3.4.6
# zoo.cfg 配置文件的修改
[root@rs227 conf]# pwd
/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/conf
[root@rs227 conf]# ls
configuration.xsl log4j.properties zoo.cfg zookeeper.out zoo_sample.cfg
[root@rs227 conf]# cp zoo_sample.cfg zoo.cfg
[root@rs227 conf]# vi zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/data
dataLogDir=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/logs
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
#http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.229=rs229:2888:3888
server.227=rs227:2888:3888
server.226=rs226:2888:3888
server.198=rs198:2888:3888
server.197=rs197:2888:3888
#server.196=rs196:2888:3888 只用5台,这两个是不用的
#server.195=rs195:2888:3888
# dataDir目录下创建 myid文件
然后每台主机写不同的id,比如:
rs229 中写入 229,
rs227 中写入 227,
以此类推 …
# 复制(SCP)到其它的服务器下去
然后只要修改…data/myid文件成对应的id就好了
rs229 中写入 229,
rs227 中写入 227,
以此类推 …
# Hadoop-2.2.0
# 修改7个配置文件
~/hadoop-2.2.0/etc/hadoop/hadoop-env.sh
~/hadoop-2.2.0/etc/hadoop/core-site.xml
~/hadoop-2.2.0/etc/hadoop/hdfs-site.xml
~/hadoop-2.2.0/etc/hadoop/mapred-site.xml
~/hadoop-2.2.0/etc/hadoop/yarn-env.sh
~/hadoop-2.2.0/etc/hadoop/yarn-site.xml
~/hadoop-2.2.0/etc/hadoop/slaves
# 1修改hadoop-env.sh配置文件(jdk 路径)
[root@masterhadoop]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop
[root@masterhadoop]# vi hadoop-env.sh
# The java implementation to use.
exportJAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_60
#export JAVA_HOME=${JAVA_HOME}
# 2修改core-site.xml文件修改 (注意fs.defaultFS的配置)
fs.defaultFS的配置中,value在rs229上就写rs229,在rs227上就写rs227,在哪台服务器上就写哪台服务器的主机名
[root@masterhadoop]# vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>rs229:2181,rs227:2181,rs226:2181,rs198:2181,rs197:2181</value>
</property>
</configuration>
# 3修改hdfs-site.xml配置文件
[root@master hadoop]# vi hdfs-site.xml
<configuration>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>rs229,rs227</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.rs229</name>
<value>rs229:9000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.rs227</name>
<value>rs227:9000</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.rs229</name>
<value>rs229:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.rs227</name>
<value>rs227:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://rs229:8485;rs227:8485;rs226:8485/mycluster</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.mycluster</name>
<value>true</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp/journal</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
</configuration>
# 4修改 mapred-site.xml配置文件
[root@masterhadoop]# cp mapred-site.xml.template mapred-site.xml
[root@masterhadoop]# vi mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
# 5修改yarn-env.sh配置文件
[root@masterhadoop]# vi yarn-env.sh
# some Javaparameters
exportJAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_60
# 6修改yarn-site.xml配置文件 (还是单点,你逗饿么?)
[root@masterhadoop]# vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>rs229</value>
</property>
</configuration>
# 7修改slaves配置文件
[root@masterhadoop]# vi slaves
rs229
rs227
rs226
rs198
rs197
rs196
rs195
# Hadoop配置结束,开始启动各个程序(笔记只保留重要日志信息)
# 在每个节点上启动Zookeeper
[root@rs229 zookeeper-3.4.6]# pwd
/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6
[root@rs229 zookeeper-3.4.6]#bin/zkServer.sh start
JMX enabled by default
Using config:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@rs229 zookeeper-3.4.6]#
其它服务器也这样启动,这里就不写了…
# 验证Zookeeper是否启动成功1
在rs229上查看zookeeper的状态发现是leader
在其他的机器上查看zookeeper的状态发现是follower
[root@rs229 bin]# pwd
/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin
[root@rs229 bin]# ./zkServer.sh status
JMX enabled by default
Using config:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf/zoo.cfg
Mode: leader
[root@rs229 bin]#
#验证Zookeeper是否启动成功2
[root@rs229 hadoop]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop
[root@rs229 hadoop]#../../../../zookeeper/zookeeper-3.4.6/bin/zkCli.sh
Connecting to localhost:2181
2014-06-10 16:51:53,575 [myid:] - INFO [main:Environment@100] - Clientenvironment:zookeeper.version=3.4.6-1569965, built on 02/20/2014 09:09 GMT
2014-06-10 16:51:53,580 [myid:] - INFO [main:Environment@100] - Clientenvironment:host.name=master
2014-06-10 16:51:53,581 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.version=1.7.0_60
2014-06-10 16:51:53,584 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.vendor=Oracle Corporation
2014-06-10 16:51:53,584 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre
2014-06-10 16:51:53,584 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../build/classes:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../build/lib/*.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/slf4j-log4j12-1.6.1.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/slf4j-api-1.6.1.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/netty-3.7.0.Final.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/log4j-1.2.16.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/jline-0.9.94.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../zookeeper-3.4.6.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../src/java/lib/*.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf:
2014-06-10 16:51:53,584 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.library.path=/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
2014-06-10 16:51:53,584 [myid:] - INFO [main:Environment@100] - Client environment:java.io.tmpdir=/tmp
2014-06-10 16:51:53,585 [myid:] - INFO [main:Environment@100] - Clientenvironment:java.compiler=<NA>
2014-06-10 16:51:53,585 [myid:] - INFO [main:Environment@100] - Clientenvironment:os.name=Linux
2014-06-10 16:51:53,585 [myid:] - INFO [main:Environment@100] - Clientenvironment:os.arch=amd64
2014-06-10 16:51:53,585 [myid:] - INFO [main:Environment@100] - Clientenvironment:os.version=2.6.32-279.el6.x86_64
2014-06-10 16:51:53,585 [myid:] - INFO [main:Environment@100] - Client environment:user.name=root
2014-06-10 16:51:53,586 [myid:] - INFO [main:Environment@100] - Clientenvironment:user.home=/root
2014-06-10 16:51:53,586 [myid:] - INFO [main:Environment@100] - Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop
2014-06-10 16:51:53,588 [myid:] - INFO [main:ZooKeeper@438] - Initiating clientconnection, connectString=localhost:2181 sessionTimeout=30000watcher=org.apache.zookeeper.ZooKeeperMain$MyWatcher@590aeb1f
Welcome to ZooKeeper!
2014-06-10 16:51:53,627 [myid:] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@975] - Openingsocket connection to server localhost/127.0.0.1:2181. Will not attempt toauthenticate using SASL (unknown error)
2014-06-10 16:51:53,634 [myid:] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@852]- Socket connection established to localhost/127.0.0.1:2181, initiating session
JLine support is enabled
2014-06-10 16:51:53,646 [myid:] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@1235] - Sessionestablishment complete on server localhost/127.0.0.1:2181, sessionid =0xe5467eb575d20003, negotiated timeout = 30000
WATCHER::
WatchedEvent state:SyncConnected type:None path:null
[zk: localhost:2181(CONNECTED) 0] ls /
[zookeeper]
[zk: localhost:2181(CONNECTED) 1]
出现这样的提示的话,那么zookeeper就启动成功了
# 在rs229上格式化Zookeeper
[root@rs229 sbin]# ../bin/hdfszkfc -formatZK
14/06/10 16:55:41 WARN util.NativeCodeLoader: Unableto load native-hadoop library for your platform... using builtin-java classeswhere applicable
14/06/10 16:55:41 INFO tools.DFSZKFailoverController:Failover controller configured for NameNode NameNode atrs229/116.255.224.229:9000
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:host.name=master
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.version=1.7.0_60
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.vendor=Oracle Corporation
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop:…
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.io.tmpdir=/tmp
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.compiler=<NA>
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.name=Linux
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.arch=amd64
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.name=root
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.home=/root
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin
14/06/10 16:55:41 INFO zookeeper.ZooKeeper:Initiating client connection, connectString=rs229:2181,rs227:2181,rs226:2181sessionTimeout=5000 watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@3a469fea
14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Openingsocket connection to server master/116.255.224.229:2181. Will not attempt toauthenticate using SASL (unknown error)
14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Socketconnection established to master/116.255.224.229:2181, initiating session
14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Sessionestablishment complete on server master/116.255.224.229:2181, sessionid =0xe5467eb575d20004, negotiated timeout = 5000
14/06/10 16:55:41 INFOha.ActiveStandbyElector: Session connected.
14/06/10 16:55:41 INFOha.ActiveStandbyElector: Successfully created /hadoop-ha/mycluster in ZK.
14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Session:0xe5467eb575d20004 closed
14/06/10 16:55:41 INFO zookeeper.ClientCnxn:EventThread shut down
# 验证zkfc是否格式化成功
[root@rs229 bin]# pwd
/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin
[root@rs229 bin]# ./zkCli.sh
[zk: localhost:2181(CONNECTED) 0] ls /
[hadoop-ha, zookeeper]
[zk: localhost:2181(CONNECTED) 2] ls /hadoop-ha
[mycluster]
可以看到使用ls命令后多了一个hadoop-ha,这样就成功了
# 完全分布式 启动Hadoop(切记顺序不能乱)
# 在 rs229,rs227,rs226 上分别启动 journalnode
# 启动rs229的journalNode
[root@rs229 sbin]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin
[root@rs229 sbin]# ./hadoop-daemon.sh startjournalnode
[root@rs229 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs229.log(查看日志是否报错,不报错你就赢了一半了)
# 启动rs227的journalNode
[root@rs227 sbin]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin
[root@rs227 sbin]# ./hadoop-daemon.sh startjournalnode
[root@rs227 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs227.log(查看日志是否报错,不报错你就赢了一半了)
[root@rs226 ~]# cd/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin/
[root@rs226 sbin]# ./hadoop-daemon.sh startjournalnode
starting journalnode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs226.out
[root@rs226 sbin]# jps
16799 Jps
20960 QuorumPeerMain
16732 JournalNode
# 启动rs226的journalNode
[root@rs226 sbin]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin
[root@rs226 sbin]# ./hadoop-daemon.sh startjournalnode
[root@rs226 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs226.log(查看日志是否报错,不报错你就赢了一半了)
# 在 rs229,rs227上分别格式化和启动namenode
从rs229和rs227中任选一个即可,这里选择的是229
# 格式化跟启动rs229上的namenode
[root@rs229 sbin]# ../bin/hdfs namenode ?format
[root@rs229 sbin]# ./hadoop-daemon.sh start namenode
# 在这个过程中饿抱了一个错误java.net.BindException:
Problem binding to [rs229:9000]java.net.BindException: Address already in use; For more details see: http://wiki.apache.org/hadoop/BindException,如何解决请看下面的 # ?那伊抹微笑搭建hadoop环境出现的问题(仅供参考)中的 Hadoop-2.X错误中的原因分析以及解决
# 将rs229上namenode的数据同步到rs227中去,需要在rs227上执行hadoop的命令
[root@rs227 sbin]# ../bin/hdfs namenode-bootstrapStandby
[root@rs227 sbin]# ./hadoop-daemon.sh start namenode
[root@rs227 sbin]# cd ../tmp/
[root@rs227 tmp]# ll(可以看到数据已经从rs229同步到rs227上去了)
total 8
drwxr-xr-x 3 root root 4096 Jun 10 17:13 dfs
drwxr-xr-x 3 root root 4096 Jun 10 17:09 journal
下面是namenode数据同步的重要部分的日志信息
14/06/10 17:13:33 INFO namenode.NameNode: registeredUNIX signal handlers for [TERM, HUP, INT]
14/06/10 17:13:34 WARN util.NativeCodeLoader: Unableto load native-hadoop library for your platform... using builtin-java classeswhere applicable
=====================================================
About to bootstrap Standby ID rs227from:
Nameservice ID: mycluster
Other Namenode ID: rs229
Other NN's HTTP address: rs229:50070
Other NN's IPC address:rs229/116.255.224.229:9000
Namespace ID: 819891643
Block pool ID:BP-1742810392-116.255.224.229-1402391347726
Cluster ID: mycluster
Layout version: -47
=====================================================
14/06/10 17:13:34 INFO common.Storage: Storagedirectory /usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp/dfs/name hasbeen successfully formatted.
14/06/10 17:13:34 INFO namenode.TransferFsImage:Opening connection tohttp://rs229:50070/getimage?getimage=1&txid=0&storageInfo=-47:819891643:0:mycluster
14/06/10 17:13:34 INFO namenode.TransferFsImage:Transfer took 0.14s at 0.00 KB/s
14/06/10 17:13:34 INFO namenode.TransferFsImage:Downloaded file fsimage.ckpt_0000000000000000000 size 196 bytes.
14/06/10 17:13:35 INFO util.ExitUtil: Exiting withstatus 0
# 打开浏览器,访问rs229跟rs227的50070端口
如果都能访问到,说明你namenode启动成功了,并且这两个namenode都是standby状态
# namenode(rs229)转换成active(这里不需要手动将namenode转换为active状态了,因为我们是交给Zookeeper管理,在后面会启动ZooKeeperFailoverController)
# 启动所有的 datanodes(在rs229上执行命令)
[root@rs229 sbin]# ./hadoop-daemons.sh start datanode(查看日志,没报错你就赢了)
rs198: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs198.out
rs197: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs197.out
rs196: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs196.out
rs195: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs195.out
rs227: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs227.out
rs226: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs226.out
rs229: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs229.out
[root@rs229 sbin]# jps
25627 Jps
24037 NameNode
25168 DataNode
23343 JournalNode
29367 QuorumPeerMain
# 实验一下手动切换namenode的状态(这里也不需要做,Zookeeper管理的,自动切换,下面会讲到)
# yarn启动
[root@rs229 sbin]# ./start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-resourcemanager-rs229.out
rs229: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs229.out
rs196: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS196.out
rs197: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS197.out
rs226: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs226.out
rs198: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS198.out
rs227: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs227.out
rs195: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS195.out
# 访问rs229的8088端口查看ResourceManager的UI界面
# 启动ZooKeeperFailoverController(在rs229,rs227上执行命令,日志重点地方已经使用红颜色的字体标出)
#在rs229上执行命令
[root@rs229 sbin]# ./hadoop-daemon.sh start zkfc
starting zkfc, logging to /usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.out
[root@rs229 sbin]# tail -100fusr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log
tail: cannot open`usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log'for reading: No such file or directory
[root@rs229 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log
2014-06-10 17:45:47,887 WARNorg.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop libraryfor your platform... using builtin-java classes where applicable
2014-06-10 17:45:47,889 INFOorg.apache.hadoop.hdfs.tools.DFSZKFailoverController: Failover controllerconfigured for NameNode NameNode at rs229/116.255.224.229:9000
2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:host.name=master
2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.version=1.7.0_60
2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.vendor=OracleCorporation
2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre
2014-06-10 17:45:48,090 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop…
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
2014-06-10 17:45:48,091 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.compiler=<NA>
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.name=Linux
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.arch=amd64
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.name=root
2014-06-10 17:45:48,091 INFO org.apache.zookeeper.ZooKeeper:Client environment:user.home=/root
2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0
2014-06-10 17:45:48,092 INFOorg.apache.zookeeper.ZooKeeper: Initiating client connection,connectString=rs229:2181,rs227:2181,rs226:2181 sessionTimeout=5000watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@4e9cba32
2014-06-10 17:45:48,121 INFOorg.apache.zookeeper.ClientCnxn: Opening socket connection to serverrs226/116.255.224.226:2181. Will not attempt to authenticate using SASL(unknown error)
2014-06-10 17:45:48,127 INFOorg.apache.zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session
2014-06-10 17:45:48,147 INFOorg.apache.zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0000, negotiated timeout= 5000
2014-06-10 17:45:48,153 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Session connected.
2014-06-10 17:45:48,236 INFOorg.apache.hadoop.ipc.Server: Starting Socket Reader #1 for port 8019
2014-06-10 17:45:48,273 INFOorg.apache.hadoop.ipc.Server: IPC Server Responder: starting
2014-06-10 17:45:48,273 INFOorg.apache.hadoop.ipc.Server: IPC Server listener on 8019: starting
2014-06-10 17:45:48,403 INFOorg.apache.hadoop.ha.HealthMonitor: Entering state SERVICE_HEALTHY
2014-06-10 17:45:48,403 INFOorg.apache.hadoop.ha.ZKFailoverController: Local service NameNode atrs229/116.255.224.229:9000 entered state: SERVICE_HEALTHY
2014-06-10 17:45:48,421 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Checking for any old active whichneeds to be fenced...
2014-06-10 17:45:48,440 INFOorg.apache.hadoop.ha.ActiveStandbyElector: No old node to fence
2014-06-10 17:45:48,440 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Writing znode/hadoop-ha/mycluster/ActiveBreadCrumb to indicate that the local node is themost recent active...
2014-06-10 17:45:48,447 INFOorg.apache.hadoop.ha.ZKFailoverController: Trying to make NameNode atrs229/116.255.224.229:9000 active...
2014-06-10 17:45:48,786 INFOorg.apache.hadoop.ha.ZKFailoverController: Successfully transitioned NameNodeat rs229/116.255.224.229:9000 to active state
[root@rs229 sbin]# jps
24037 NameNode
25168 DataNode
26012 NodeManager
25891 ResourceManager
23343 JournalNode
27026 DFSZKFailoverController
29367 QuorumPeerMain
27208 Jps
#在rs227上执行命令
[root@rs227 sbin]# ./hadoop-daemon.sh start zkfc
starting zkfc, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs227.out
[root@rs227 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs227.log
2014-06-10 17:46:39,078 WARNorg.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop libraryfor your platform... using builtin-java classes where applicable
2014-06-10 17:46:39,080 INFOorg.apache.hadoop.hdfs.tools.DFSZKFailoverController: Failover controllerconfigured for NameNode NameNode at rs227/116.255.224.227:9000
2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:host.name=rs227
2014-06-10 17:46:39,285 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.version=1.7.0_60
2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.vendor=OracleCorporation
2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre
2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop…
2014-06-10 17:46:39,286 INFO org.apache.zookeeper.ZooKeeper:Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib
2014-06-10 17:46:39,286 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp
2014-06-10 17:46:39,286 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.compiler=<NA>
2014-06-10 17:46:39,286 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.name=Linux
2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.arch=amd64
2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64
2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.name=root
2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.home=/root
2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0
2014-06-10 17:46:39,288 INFOorg.apache.zookeeper.ZooKeeper: Initiating client connection,connectString=rs229:2181,rs227:2181,rs226:2181 sessionTimeout=5000watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@5f997454
2014-06-10 17:46:39,317 INFOorg.apache.zookeeper.ClientCnxn: Opening socket connection to server rs226/116.255.224.226:2181.Will not attempt to authenticate using SASL (unknown error)
2014-06-10 17:46:39,323 INFOorg.apache.zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session
2014-06-10 17:46:39,335 INFOorg.apache.zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0001, negotiated timeout= 5000
2014-06-10 17:46:39,340 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Session connected.
2014-06-10 17:46:39,409 INFOorg.apache.hadoop.ipc.Server: Starting Socket Reader #1 for port 8019
2014-06-10 17:46:39,445 INFOorg.apache.hadoop.ipc.Server: IPC Server Responder: starting
2014-06-10 17:46:39,445 INFOorg.apache.hadoop.ipc.Server: IPC Server listener on 8019: starting
2014-06-10 17:46:39,608 INFOorg.apache.hadoop.ha.HealthMonitor: Entering state SERVICE_HEALTHY
2014-06-10 17:46:39,608 INFOorg.apache.hadoop.ha.ZKFailoverController: Local service NameNode atrs227/116.255.224.227:9000 entered state: SERVICE_HEALTHY
2014-06-10 17:46:39,636 INFOorg.apache.hadoop.ha.ZKFailoverController: ZK Election indicated that NameNodeat rs227/116.255.224.227:9000 should become standby
2014-06-10 17:46:39,656 INFOorg.apache.hadoop.ha.ZKFailoverController: Successfully transitioned NameNodeat rs227/116.255.224.227:9000 to standby state
[root@rs227 sbin]# jps
17710 Jps
17338 NodeManager
16725 NameNode
17627 DFSZKFailoverController
27264 QuorumPeerMain
16495 JournalNode
17091 DataNode
# 打开浏览器,再访问rs229跟rs227的50070端口
发现rs229变成active状态了,而rs227还是standby状态
# 验证HDFS是否好用
[root@rs229 sbin]# ../bin/hadoop fs -putyarn-daemon.sh /yting
[root@rs229 sbin]# ../bin/hadoop fs -ls /yting
Found 1 items
-rw-r--r-- 3root supergroup 4278 2014-06-1018:29 /yting/yarn-daemon.sh
# 验证YARN是否好用
[root@rs229 bin]# pwd
/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/bin
[root@rs229 bin]# ./hadoop jar../share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 10 100
…(不重要的部分就省略了,能出这个值就是对的,虚拟机可能会卡着不动,也可能会卡死,属于正常现象,内存消耗比较大)
Job Finished in 25.361 seconds
valueof Pi is 3.14800000000000000000
# 验证HA高可用性,是否自动故障转移
# 打开浏览器,访问rs229跟rs227的50070端口
发现rs229从为active状态,而rs227为standby状态
在rs229上直接kill掉namenode进程
[root@rs229 bin]# jps
31742 ResourceManager
32287 DFSZKFailoverController
31356 NameNode
31007 JournalNode
29367 QuorumPeerMain
31862 NodeManager
2530 Jps
31592 DataNode
[root@rs229 bin]# kill -9 31356
[root@rs229 bin]# jps
31742 ResourceManager
32287 DFSZKFailoverController
2578 Jps
31007 JournalNode
29367 QuorumPeerMain
31862 NodeManager
31592 DataNode
进程已经被kill掉了
# 打开浏览器,再访问rs229跟rs227的50070端口
发现rs229无法访问,而rs227 从standby转换成active状态,成功了
这时候在rs229上使用hdfs跟mapreduce还是可以正常运行的,尽管rs229上的namenode进程已经被kill掉,这就是故障转移的好处啊!
# Hbase-0.96.2-hadoop2(启动双HMaster的配置,rs229是主HMaster,rs227是从HMaster)
# 解压Hbase-0.96.2-hadoop2-bin.tar.gz
[root@rs229 hbase-0.96.2-hadoop2]# pwd
/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2
[root@rs229 hbase-0.96.2-hadoop2]# tar -zxvfhbase-0.96.2-hadoop2-bin.tar.gz
[root@rs229 hbase]# ll
total 77516
drwxr-xr-x 12 root root 4096 May 21 16:38 hbase-0.94.19
drwxr-xr-x 7root root 4096 Jun 11 17:10hbase-0.96.2-hadoop2
-rw-r--r-- 1root root 79367504 Jun 11 17:10 hbase-0.96.2-hadoop2-bin.tar.gz
# 修改hbase-env.sh 文件
[root@rs229 conf]# pwd
/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/conf
[root@master conf]# vi hbase-env.sh
将 JAVA_HOME 的注释去掉,并把路径修改正确
# The java implementation to use. Java 1.6 required.
export JAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_6
# Tell HBase whether it should manage it's owninstance of Zookeeper or not.
export HBASE_MANAGES_ZK=false
# 配置hbase-site.xml 文件
<configuration>
<property>
<name>hbase.rootdir</name>
<value>hdfs://mycluster/hbase</value><!--这里必须跟core-site.xml中的配置一样-->
</property>
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<property>
<name>hbase.tmp.dir</name>
<value>/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/tmp</value>
</property>
<property>
<name>hbase.master</name>
<value>60000</value> # 这里是对的,只配置端口,为了配置多个HMaster
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>rs229,rs227,rs226,rs198,rs197</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/data</value>
</property>
</configuration>
# 配置regionservers
[root@rs229 conf]# cat regionservers
rs195
rs196
rs197
rs198
rs226
# 创建hdfs-site.xml的软连接
[root@rs229 conf]# pwd
/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/conf
[root@rs229 conf]# ln/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop/hdfs-site.xmlhdfs-site.xml
# 配置profile文件,以便可以直接使用相关的命令(这里就不说了)
# jar包覆盖,这个版本hbase-0.96.2不需要覆盖jar包,跟hadoop-2.2.0的版本jar包是一样的,看下面
[root@rs229 lib]# ll | grep hadoop
-rw-r--r-- 1 root root 16778 Jun 11 18:15hadoop-annotations-2.2.0.jar
-rw-r--r-- 1 root root 49750 Jun 11 18:16 hadoop-auth-2.2.0.jar
-rw-r--r-- 1 root root 2559 Jun 11 18:16 hadoop-client-2.2.0.jar
-rw-r--r-- 1 root root 2735584 Jun 11 18:15 hadoop-common-2.2.0.jar
-rw-r--r-- 1 root root 5242252 Jun 11 18:15 hadoop-hdfs-2.2.0.jar
-rw-r--r-- 1 root root 1988460 Jun 11 18:16hadoop-hdfs-2.2.0-tests.jar
-rw-r--r-- 1 root root 482042 Jun 11 18:16hadoop-mapreduce-client-app-2.2.0.jar
-rw-r--r-- 1 root root 656365 Jun 11 18:15hadoop-mapreduce-client-common-2.2.0.jar
-rw-r--r-- 1 root root 1455001 Jun 11 18:15hadoop-mapreduce-client-core-2.2.0.jar
-rw-r--r-- 1 root root 35216 Jun 11 18:16hadoop-mapreduce-client-jobclient-2.2.0.jar
-rw-r--r-- 1 root root 1434852 Jun 11 18:15hadoop-mapreduce-client-jobclient-2.2.0-tests.jar
-rw-r--r-- 1 root root 21537 Jun 11 18:15hadoop-mapreduce-client-shuffle-2.2.0.jar
-rw-r--r-- 1 root root 1158936 Jun 11 18:15hadoop-yarn-api-2.2.0.jar
-rw-r--r-- 1 root root 94728 Jun 11 18:16hadoop-yarn-client-2.2.0.jar
-rw-r--r-- 1 root root 1301627 Jun 11 18:15hadoop-yarn-common-2.2.0.jar
-rw-r--r-- 1 root root 175554 Jun 11 18:16hadoop-yarn-server-common-2.2.0.jar
-rw-r--r-- 1 root root 467638 Jun 11 18:16hadoop-yarn-server-nodemanager-2.2.0.jar
# 启动hbase
[root@rs229 hbase-0.96.2-hadoop2]# ./bin/start-hbase.sh
starting master, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs229.out
rs197: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS197.out
rs227: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS227.out
rs195: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS195.out
rs226: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-rs226.out
rs198: starting regionserver, logging to /usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS198.out
[root@rs229 hbase-0.96.2-hadoop2]# jps
5131 Jps
4827 HRegionServer
4661 HMaster
6395 NodeManager
6272 DataNode
29849 QuorumPeerMain
# hbase shell 验证 1(查看hbase的版本跟状态)
hbase(main):003:0> list # 刚刚创建的表
TABLE
user
yting
2 row(s) in 0.0800 seconds
=> ["user", "yting"]
hbase(main):004:0> version
0.96.2-hadoop2, r1581096, Mon Mar 24 16:03:18 PDT2014
hbase(main):005:0> status
5 servers, 0 dead, 0.8000 average load
hbase(main):006:0>
# hbase shell 验证 2(建表插入数据获取数据实时)
hbase(main):006:0> create'yting_xmei1129','uid','info'
0 row(s) in 0.4706 seconds
=> Hbase::Table - yting_xmei1129
hbase(main):007:0> put'yting_xmei1129','1314520','info:yousmile','forever'
0 row(s) in 0.1350 seconds
hbase(main):008:0> get 'yting_xmei1129','1314520'
COLUMN CELL
info:yousmile timestamp=1402569647483, value=forever
1 row(s) in 0.0530 seconds
hbase(main):009:0> scan 'yting_xmei1129'
ROW COLUMN+CELL
1314520 column=info:yousmile, timestamp=1402569647483, value=forever
1 row(s) in 0.0500 seconds
hbase(main):010:0>
# 在rs227上启动HMaster
[root@rs227 bin]# ./hbase-daemon.sh start master
starting master, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs227.out
[root@rs227 bin]# tail -100f/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs227.log
2014-07-03 15:43:47,798 INFO [master:rs227:60000] mortbay.log: StartedSelectChannelConnector@0.0.0.0:60010
2014-07-03 15:43:47,897 INFO [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/master already exists and this isnot a retry
2014-07-03 15:43:47,898 INFO [master:rs227:60000]master.ActiveMasterManager: Adding ZNode for/hbase/backup-masters/rs227,60000,1402645426368 in backup master directory
2014-07-03 15:43:47,908 INFO [master:rs227:60000] master.ActiveMasterManager:Another master is the active master, rs229,60000,1402645371520; waiting tobecome the next active master
# 验证HMaster自动切换
# rs227上的日志查看
2014-07-03 15:43:47,798 INFO [master:rs227:60000] mortbay.log: StartedSelectChannelConnector@0.0.0.0:60010
2014-07-03 15:43:47,897 INFO [master:rs22760000]zookeeper.RecoverableZooKeeper: Node /hbase/master already exists and this isnot a retry
2014-07-03 15:43:47,898 INFO [master:rs227:60000]master.ActiveMasterManager: Adding ZNode for/hbase/backup-masters/rs227,60000,1402645426368 in backup master directory
2014-07-03 15:43:47,908 INFO [master:rs227:60000] master.ActiveMasterManager:Another master is the active master, rs229,60000,1402645371520; waiting tobecome the next active master
这里说明zookeeper已经接管了,并且把rs227作为一个备份的Hbase了,并且这里提示
waiting to become thenext active master(等待变成下一个活动的master),然后我们可以将rs229上的hmaster进程给kill掉,当然,也可以使用 ./hbase-daemon.shstop master 来结束rs229上的hmaster进程
# rs229上的日志查看
2014-07-03 15:43:00,670 INFO [master:rs229:60000]master.AssignmentManager: Found regions out on cluster or in RIT; presumingfailover
2014-07-03 15:43:00,772 DEBUG [master:rs229:60000]hbase.ZKNamespaceManager: Updating namespace cache from node default with data:\x0A\x07default
2014-07-03 15:43:00,775 DEBUG [master:rs229:60000]hbase.ZKNamespaceManager: Updating namespace cache from node hbase with data:\x0A\x05hbase
2014-07-03 15:43:00,796 INFO [master:rs229:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/default already existsand this is not a retry
2014-07-03 15:43:00,803 INFO [master:rs229:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/hbase already exists andthis is not a retry
2014-07-03 15:43:00,806 INFO [master:rs229:60000] master.HMaster: Masterhas completed initialization
# kill掉rs229上的hmaster进程,看看rs227上的日志会有什么变化
[root@RS229 conf]# jps
27324 HRegionServer
6395 NodeManager
6272 DataNode
28517 HMaster
28797 Jps
29849 QuorumPeerMain
[root@RS229 conf]# kill -9 28517
[root@RS229conf]# jps
27324 HRegionServer
6395 NodeManager
6272 DataNode
28973 Jps
29849 QuorumPeerMain
[root@RS229 conf]#
# 下面是rs227上日志变化后的信息,所有信息全部复制下来了
2014-07-03 15:49:49,644 DEBUG [main-EventThread]master.ActiveMasterManager: No master available.Notifying waiting threads
2014-07-03 15:49:49,649 DEBUG [main-EventThread]master.ActiveMasterManager: A master is now available
2014-07-03 15:49:49,649 INFO [master:rs227:60000]master.ActiveMasterManager: Deleting ZNode for/hbase/backup-masters/rs227,60000,1402645426368 from backup master directory
2014-07-03 15:49:49,655 INFO [master:rs227:60000]master.ActiveMasterManager: Registered Active Master=rs227,60000,1402645426368
2014-07-03 15:49:49,662 INFO [master:rs227:60000]Configuration.deprecation: fs.default.name is deprecated. Instead, usefs.defaultFS
2014-07-03 15:49:49,980 INFO [master:rs227:60000]Configuration.deprecation: hadoop.native.lib is deprecated. Instead, useio.native.lib.available
2014-07-03 15:49:50,097 DEBUG [master:rs227:60000]util.FSTableDescriptors: Current tableInfoPath = hdfs://mycluster/hbase/data/hbase/meta/.tabledesc/.tableinfo.0000000001
2014-07-03 15:49:50,120 DEBUG [master:rs227:60000]util.FSTableDescriptors: TableInfo already exists.. Skipping creation
2014-07-03 15:49:50,178 INFO [master:rs227:60000] fs.HFileSystem: Addedintercepting call to namenode#getBlockLocations so can do block reorderingusing class class org.apache.hadoop.hbase.fs.HFileSystem$ReorderWALBlocks
2014-07-03 15:49:50,187 INFO [master:rs227:60000] master.SplitLogManager:Timeout=120000, unassigned timeout=180000, distributedLogReplay=false
2014-07-03 15:49:50,192 INFO [master:rs227:60000] master.SplitLogManager:Found 0 orphan tasks and 0 rescan nodes
2014-07-03 15:49:50,223 INFO [master:rs227:60000] zookeeper.ZooKeeper:Initiating client connection, connectString=rs227:2181,rs229:2181,rs226:2181sessionTimeout=90000 watcher=hconnection-0x4dbe91f3,quorum=rs227:2181,rs229:2181,rs226:2181, baseZNode=/hbase
2014-07-03 15:49:50,224 INFO [master:rs227:60000]zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x4dbe91f3connecting to ZooKeeper ensemble=rs227:2181,rs229:2181,rs226:2181
2014-07-03 15:49:50,224 INFO [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Opening socket connection to serverrs226/116.255.224.226:2181. Will not attempt to authenticate using SASL(unknown error)
2014-07-03 15:49:50,225 INFO [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session
2014-07-03 15:49:50,229 INFO [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0049, negotiated timeout= 40000
2014-07-03 15:49:50,255 DEBUG [master:rs227:60000]catalog.CatalogTracker: Starting catalog trackerorg.apache.hadoop.hbase.catalog.CatalogTracker@2cb8709b
2014-07-03 15:49:50,333 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs227,60020,1402643884463data: PBU锟斤拷
2014-07-03 15:49:50,334 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs226,60020,1402643884476data: PBU锟斤拷
2014-07-03 15:49:50,335 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs198,60020,1402643884392data: PBU锟斤拷
2014-07-03 15:49:50,336 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs229,60020,1402643884248data: PBU锟斤拷
2014-07-03 15:49:50,337 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs197,60020,1402643884579data: PBU锟斤拷
2014-07-03 15:49:50,339 INFO [master:rs227:60000] master.HMaster: Serveractive/primary master=rs227,60000,1402645426368, sessionid=0xe3467eb575cc003c,setting cluster-up flag (Was=true)
2014-07-03 15:49:50,361 INFO [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/online-snapshot/acquired alreadyexists and this is not a retry
2014-07-03 15:49:50,363 INFO [master:rs227:60000]procedure.ZKProcedureUtil: Clearing all procedure znodes:/hbase/online-snapshot/acquired /hbase/online-snapshot/reached/hbase/online-snapshot/abort
2014-07-03 15:49:50,365 DEBUG [master:rs227:60000]procedure.ZKProcedureCoordinatorRpcs: Starting the controller for proceduremember:rs227,60000,1402645426368
2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=MASTER_OPEN_REGION-rs227:60000,corePoolSize=5, maxPoolSize=5
2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=MASTER_CLOSE_REGION-rs227:60000,corePoolSize=5, maxPoolSize=5
2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_SERVER_OPERATIONS-rs227:60000, corePoolSize=5, maxPoolSize=5
2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_META_SERVER_OPERATIONS-rs227:60000, corePoolSize=5, maxPoolSize=5
2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=M_LOG_REPLAY_OPS-rs227:60000,corePoolSize=10, maxPoolSize=10
2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_TABLE_OPERATIONS-rs227:60000, corePoolSize=1, maxPoolSize=1
2014-07-03 15:49:50,381 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner
2014-07-03 15:49:50,386 INFO [master:rs227:60000] zookeeper.ZooKeeper:Initiating client connection, connectString=rs227:2181,rs229:2181,rs226:2181sessionTimeout=90000 watcher=replicationLogCleaner,quorum=rs227:2181,rs229:2181,rs226:2181, baseZNode=/hbase
2014-07-03 15:49:50,387 INFO [master:rs227:60000]zookeeper.RecoverableZooKeeper: Process identifier=replicationLogCleanerconnecting to ZooKeeper ensemble=rs227:2181,rs229:2181,rs226:2181
2014-07-03 15:49:50,387 INFO [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Opening socket connection to serverrs227/116.255.224.227:2181. Will not attempt to authenticate using SASL(unknown error)
2014-07-03 15:49:50,388 INFO [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Socket connection established tors227/116.255.224.227:2181, initiating session
2014-07-03 15:49:50,391 INFO [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Session establishment complete on serverrs227/116.255.224.227:2181, sessionid = 0xe3467eb575cc003e, negotiated timeout= 40000
2014-07-03 15:49:50,400 INFO [master:rs227:60000] zookeeper.RecoverableZooKeeper:Node /hbase/replication/rs already exists and this is not a retry
2014-07-03 15:49:50,400 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner
2014-07-03 15:49:50,404 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.snapshot.SnapshotLogCleaner
2014-07-03 15:49:50,406 DEBUG [master:rs227:60000]cleaner.CleanerChore: initialize cleaner=org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner
2014-07-03 15:49:50,408 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner
2014-07-03 15:49:50,409 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner
2014-07-03 15:49:50,409 INFO [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 0, slept for 0ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms, intervalof 1500 ms.
2014-07-03 15:49:51,914 INFO [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 0, slept for1505 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.
2014-07-03 15:49:53,361 INFO [RpcServer.handler=4,port=60000]master.ServerManager: Registering server=rs197,60020,1402643884579
2014-07-03 15:49:53,362 INFO [RpcServer.handler=2,port=60000] master.ServerManager:Registering server=rs227,60020,1402643884463
2014-07-03 15:49:53,362 INFO [RpcServer.handler=0,port=60000]master.ServerManager: Registering server=rs198,60020,1402643884392
2014-07-03 15:49:53,361 INFO [RpcServer.handler=3,port=60000] master.ServerManager:Registering server=rs229,60020,1402643884248
2014-07-03 15:49:53,362 INFO [RpcServer.handler=1,port=60000]master.ServerManager: Registering server=rs226,60020,1402643884476
2014-07-03 15:49:53,369 INFO [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 5, slept for2960 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.
2014-07-03 15:49:54,873 INFO [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 5, slept for4464 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.
2014-07-03 15:49:54,923 INFO [master:rs227:60000] master.ServerManager:Finished waiting for region servers count to settle; checked in 5, slept for4514 ms, expecting minimum of 1, maximum of 2147483647, master is running.
2014-07-03 15:49:54,928 INFO [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs229,60020,1402643884248 belongs to anexisting region server
2014-07-03 15:49:54,928 INFO [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs227,60020,1402643884463 belongs to anexisting region server
2014-07-03 15:49:54,928 INFO [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs197,60020,1402643884579 belongs to anexisting region server
2014-07-03 15:49:54,928 INFO [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs198,60020,1402643884392 belongs to anexisting region server
2014-07-03 15:49:54,928 INFO [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs226,60020,1402643884476 belongs to anexisting region server
2014-07-03 15:49:55,011 INFO [master:rs227:60000] master.RegionStates:Transitioned {1588230740 state=OFFLINE, ts=1402645794937, server=null} to{1588230740 state=OPEN, ts=1402645795011, server=rs229,60020,1402643884248}
2014-07-03 15:49:55,011 INFO [master:rs227:60000] master.RegionStates:Onlined 1588230740 on rs229,60020,1402643884248
2014-07-03 15:49:55,011 INFO [master:rs227:60000] master.ServerManager:AssignmentManager hasn't finished failover cleanup; waiting
2014-07-03 15:49:55,012 INFO [master:rs227:60000] master.HMaster:hbase:meta assigned=0, rit=false, location=rs229,60020,1402643884248
2014-07-03 15:49:55,094 INFO [master:rs227:60000]catalog.MetaMigrationConvertingToPB: META already up-to date with PBserialization
2014-07-03 15:49:55,118 INFO [master:rs227:60000] master.RegionStates:Transitioned {cb2b12bd4e03b016c9e0a9c3b9c52098 state=OFFLINE, ts=1402645795118,server=null} to {cb2b12bd4e03b016c9e0a9c3b9c52098 state=OPEN, ts=1402645795118,server=rs197,60020,1402643884579}
2014-07-03 15:49:55,118 INFO [master:rs227:60000] master.RegionStates:Onlined cb2b12bd4e03b016c9e0a9c3b9c52098 on rs197,60020,1402643884579
2014-07-03 15:49:55,119 DEBUG [master:rs227:60000]master.AssignmentManager: Found {ENCODED => cb2b12bd4e03b016c9e0a9c3b9c52098,NAME => 'hbase:namespace,,1402643891966.cb2b12bd4e03b016c9e0a9c3b9c52098.',STARTKEY => '', ENDKEY => ''} out on cluster
2014-07-03 15:49:55,119 INFO [master:rs227:60000] master.AssignmentManager:Found regions out on cluster or in RIT; presuming failover
2014-07-03 15:49:55,220 DEBUG [master:rs227:60000]hbase.ZKNamespaceManager: Updating namespace cache from node default with data:\x0A\x07default
2014-07-03 15:49:55,223 DEBUG [master:rs227:60000]hbase.ZKNamespaceManager: Updating namespace cache from node hbase with data:\x0A\x05hbase
2014-07-03 15:49:55,242 INFO [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/default already existsand this is not a retry
2014-07-03 15:49:55,249 INFO [master:rs227:60000] zookeeper.RecoverableZooKeeper:Node /hbase/namespace/hbase already exists and this is not a retry
2014-07-03 15:49:55,251 INFO [master:rs227:60000] master.HMaster:Master has completed initialization
只看红色标注的地方,意思就是说当我们kill掉rs229上的hmaster的时候,Nomaster available. Notifying waiting threads . A master is now available(找不到master,唤醒等待的hmaster线程(认识96),然后找到了等待的hmaster(rs227)),然后zookeeper就接管并且将rs227上的hmaster从等待状态切换为激活状态了,然后就ok了。(当然也可以多开几个备用的hmaster)
# 安装Mysql 5.5.x
# yum安装mysql(其它方式也行了,这样方便点)
# yum 安装 mysql-server
[root@rs229 ~]# yum installMySQL-server-5.5.31-2.el6.i686.rpm
# yum 安装 mysql-client
[root@rs229 ~]# yum installMySQL-client-5.5.31-2.el6.i686.rpm
# 启动mysql服务
[root@rs229 lib]# service mysql start (注意这里是mysql,不是mysqld哦)
Starting MySQL.. SUCCESS!
[root@rs229 lib]#
# 配置mysql(设置账号密码)
[root@rs229 yum.repos.d]# mysql_secure_installation
NOTE: RUNNING ALL PARTS OF THIS SCRIPT IS RECOMMENDEDFOR ALL MySQL
SERVERSIN PRODUCTION USE! PLEASE READ EACH STEPCAREFULLY!
In order to log into MySQL to secure it, we'll needthe current
password for the root user. If you've just installed MySQL, and
you haven't set the root password yet, the passwordwill be blank,
so you should just press enter here.
Enter current password for root (enter for none):
OK, successfully used password, moving on...
Setting the root password ensures that nobody can loginto the MySQL
root user without the proper authorisation.
Set root password? [Y/n] y
New password:
Re-enter new password:
Password updated successfully!
Reloading privilege tables..
... Success!
By default, a MySQL installation has an anonymoususer, allowing anyone
to log into MySQL without having to have a useraccount created for
them. This isintended only for testing, and to make the installation
go a bit smoother. You should remove them before moving into a
production environment.
Remove anonymous users? [Y/n] n
... skipping.
Normally, root should only be allowed to connect from'localhost'. This
ensures that someone cannot guess at the rootpassword from the network.
Disallow root login remotely? [Y/n] n
... skipping.
By default, MySQL comes with a database named 'test'that anyone can
access. Thisis also intended only for testing, and should be removed
before moving into a production environment.
Remove test database and access to it? [Y/n] n
... skipping.
Reloading the privilege tables will ensure that allchanges made so far
will take effect immediately.
Reload privilege tables now? [Y/n] y
... Success!
Cleaning up...
All done! Ifyou've completed all of the above steps, your MySQL
installation should now be secure.
Thanks for using MySQL!
# 授权可以远程访问mysql
mysql> grant all on *.* to 'root'@'%' identified by '1234567';(1234567是root远程登录的密码,可以修改成你自己好记的)
Query OK, 0 rows affected (0.00 sec)
mysql> flush privileges;
Query OK, 0 rows affected (0.00 sec)
然后可以远程登录了,不解释
# Hive-0.13.1
# 解压apache-hive-0.13.1-bin.tar.gz
[root@rs229 hive]# pwd
/usr/local/adsit/yting/apache/hive
[root@rs229 hive]# ll
total 52976
-rw-r--r-- 1 root root 54246778 Jun 11 15:23apache-hive-0.13.1-bin.tar.gz
[root@rs229 hive]# tar -zxvfapache-hive-0.13.1-bin.tar.gz
# 通过../conf目录下的模版文件复制出对应的文件
[root@rs229 hive]# ll
total 52980
drwxr-xr-x 8 root root 4096 Jul 04 16:52 apache-hive-0.13.1-bin
-rw-r--r-- 1 root root 54246778 Jun 11 15:23apache-hive-0.13.1-bin.tar.gz
[root@rs229 hive]# cd apache-hive-0.13.1-bin
[root@rs229 apache-hive-0.13.1-bin]# ll
total 304
drwxr-xr-x 3 root root 4096 Jul 04 16:52 bin
drwxr-xr-x 2 root root 4096 Jul 04 16:52 conf
drwxr-xr-x 4 root root 4096 Jul 04 16:52 examples
drwxr-xr-x 7 root root 4096 Jul 04 16:52 hcatalog
drwxr-xr-x 4 root root 4096 Jul 04 16:52 lib
-rw-rw-r-- 1 500 cimsrvr 23828 Jan 30 06:23LICENSE
-rw-rw-r-- 1 500 cimsrvr 277 May 13 08:00NOTICE
-rw-rw-r-- 1 500 cimsrvr 3838 May 23 14:19README.txt
-rw-rw-r-- 1 500 cimsrvr 253839 Jun 3 03:27RELEASE_NOTES.txt
drwxr-xr-x 3 root root 4096 Jul 04 16:52 scripts
[root@rs229 apache-hive-0.13.1-bin]# cd conf/
[root@rs229 conf]# ll
total 120
-rw-rw-r-- 1 500 cimsrvr 107221 Jun 3 03:27 hive-default.xml.template
-rw-rw-r-- 1 500 cimsrvr 2378 Jan 30 06:23 hive-env.sh.template
-rw-rw-r-- 1 500 cimsrvr 2662 May 13 08:00hive-exec-log4j.properties.template
-rw-rw-r-- 1 500 cimsrvr 3050 May 13 08:00hive-log4j.properties.template
[root@rs229 conf]# cp hive-env.sh.templatehive-env.sh
[root@rs229 conf]# cp hive-default.xml.templatehive-site.xml
[root@rs229 conf]# ll
total 232
-rw-rw-r-- 1 500 cimsrvr 107221 Jun 3 03:27hive-default.xml.template
-rw-r--r-- 1 root root 2378 Jul 04 16:54 hive-env.sh
-rw-rw-r-- 1 500 cimsrvr 2378 Jan 30 06:23hive-env.sh.template
-rw-rw-r-- 1 500 cimsrvr 2662 May 13 08:00hive-exec-log4j.properties.template
-rw-rw-r-- 1 500 cimsrvr 3050 May 13 08:00hive-log4j.properties.template
-rw-r--r-- 1 root root 107221 Jul 04 16:54 hive-site.xml
# 修改hive-env.sh配置文件
[root@rs229 conf]# vi hive-env.sh
# Set HADOOP_HOME to point to a specific hadoopinstall directory
HADOOP_HOME=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0
# 修改hive-site.xml文件
<configuration>
<property>
<name>hive.metastore.warehouse.dir</name>
<value>hdfs://mycluster/user/hive/warehouse</value>
</property>
<property>
<name>hive.exec.scratchdir</name>
<value>hdfs://mycluster/user/hive/scratchdir</value>
</property>
<property>
<name>hive.querylog.location</name>
<value>/usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/logs</value>
</property>
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://rs229:3306/hiveMeta?createDatabaseIfNotExist=true</value>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>yousmile</value>
</property>
<property>
<name>hive.aux.jars.path</name> <value>file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hive-hbase-handler-0.13.1.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/protobuf-java-2.5.0.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hbase-client-0.96.2-hadoop2.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hbase-common-0.96.2-hadoop2.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/zookeeper-3.4.5.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/guava-11.0.2.jar</value>
<!?这里是重点的地方,为了跟Hbase整合,所以千万别写错了,hive.aux.jars.path 的value中间不允许有空格,回车,换行什么的,全部写在一行上就行了,不然会出各种错-->
</property>
<property>
<name>hive.zookeeper.quorum</name>
<value>rs229,rs227,rs226,rs198,rs197</value>
<description>The list of zookeeper servers to talk to. This isonly needed for read/write locks.</description>
</property>
<!--
<property>
<name>hive.metastore.uris</name>
<value>thrift://rs229:9083</value>
</property>
-->
</configuration>
注意1 : <name>hive.aux.jars.path</name>配置中不要出现空格的符号,全部写在一行上,不然要报错
注意2 : 如果使用mysql的话需要在${HIVE_HOME}/lib 目录下加入mysql的jdbc链接jar包
注意3 : mysql必须授权远程登录
# 创建数据仓库目录
[root@rs229 bin]# ./hadoop fs -mkdir -p/user/hive/warehouse
# 将需要的jar包加入到$HIVE_HOME/lib下去
hive.aux.jars.path这个路径用到的jar复制到$HIVE_HOME/lib目录下去,对照配置然后去将Hadoop目录下相关的jar包拷到Hive目录下去
# Hive测试
# 进入Hive Shell
[root@rs229 conf]# hive
hive> show databases;
OK
default
Time taken: 0.629 seconds, Fetched: 2 row(s)
hive> create database yting_test_20140703;
# 创建数据库
hive> show databases;
OK
default
yting_test_20140703
# 建表
hive> use yting_test_20140703;
OK
Time taken: 0.023 seconds
hive> create external table yousmile(uidint,myname string,youname string) row format delimited fields terminated by ','location '/user/hive/warehouse/yousmile';
这里是创建的一个外部表
# Load数据到表yting_test_20140703
[root@rs229 tdata]# pwd
/usr/local/yting/hive/data/tdata
[root@rs229 tdata]# cat/usr/local/yting/hive/data/tdata/testdata001.dat
1314520,yting,xmei
[root@rs229 tdata]#
hive> load data local inpath'/usr/local/yting/hive/data/tdata/testdata001.dat' overwrite into tableyousmile;
Copying data fromfile:/usr/local/yting/hive/data/tdata/testdata001.dat
Copying file:file:/usr/local/yting/hive/data/tdata/testdata001.dat
Loading data to table yting_test_20140703.yousmile
rmr: DEPRECATED: Please use 'rm -r' instead.
Deleted hdfs://mycluster/user/hive/warehouse/yousmile
Table yting_test_20140703.yousmile stats:[numFiles=0, numRows=0, totalSize=0, rawDataSize=0]
OK
Time taken: 0.877 seconds
hive> show tables;
OK
yousmile
Time taken: 0.028 seconds, Fetched: 1 row(s)
# 查询数据是否加载成功
hive> select * from yousmile;
OK
1314520 yting xmei
Time taken: 0.326 seconds, Fetched: 1 row(s)
hive>
可以查询出刚刚的数据,不解释,成功了!
# Hive跟Hbase的整合
# Hive To Hbase(Hive中的表数据导入到Hbase中去)
# 创建Hbase能识别的表(带分区,扩展知识用的,这里不使用)
hive> CREATE TABLEhbase_to_hbase_yousmile_20140704(key int, value string) partitioned by (daystring) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITHSERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:info")TBLPROPERTIES ("hbase.table.name" ="hive_to_hbase_yousmile_20140704");
# 创建Hbase能识别的表(不带分区)
hive> set hive.hbase.bulk=true;
# 创建数据库yting_yousmile_20140704
hive> create databaseyting_yousmile_20140704; # 创建数据库
hive> useyting_yousmile_20140704;
OK
Time taken: 0.023 seconds
OK
Time taken: 0.254 seconds
# 使用数据库yting_yousmile_20140704
hive> useyting_yousmile_20140704; # 使用该数据库
OK
Time taken: 0.023 seconds
# 创建本地表,用来存储数据,然后插入到Hbase用的,相当于一张中间表了
hive> create tablehive_to_hbase_middle_yousmile_20140704(uid int,info string) row formatdelimited fields terminated by ',';
OK
Time taken: 0.056 seconds
hive> show tables;
OK
hive_to_hbase_middle_yousmile_20140704
hive_to_hbase_yousmile_20140704
Time taken: 0.023 seconds, Fetched: 2 row(s)
hive>
# Load本地数据(不是HDFS上的数据,Linux上的数据)到Hive的中间表hive_to_hbase_middle_yousmile_20140704去
hive> load data localinpath '/usr/local/yting/hive/data/tdata/hbase_test001.dat' overwrite intotable hive_to_hbase_middle_yousmile_20140704;
Copying data fromfile:/usr/local/yting/hive/data/tdata/hbase_test001.dat
Copying file:file:/usr/local/yting/hive/data/tdata/hbase_test001.dat
Loading data to table yting_yousmile_20140704.hive_to_hbase_middle_yousmile_20140704
rmr: DEPRECATED: Please use 'rm -r' instead.
Deletedhdfs://mycluster/user/hive/warehouse/yting_yousmile_20140704.db/hive_to_hbase_middle_yousmile_20140704
Table yting_yousmile_20140704.hive_to_hbase_middle_yousmile_20140704stats: [numFiles=1, numRows=0, totalSize=38, rawDataSize=0]
OK
Time taken: 0.647 seconds
# 查看中间表hive_to_hbase_middle_yousmile_20140704是否Load数据成功
hive> select * fromhive_to_hbase_middle_yousmile_20140704;
OK
1 the
2 you
3 smile
4 until
5 forever
Time taken: 0.272 seconds, Fetched: 5 row(s)
hive>
# 中间表的数据hive_to_hbase_middle_yousmile_20140704插入与Hbase关联的表hive_to_hbase_yousmile_20140704,也就是插入到Hbase中去
这里稍微花了点时间,耐心的等一下
hive> insert overwritetable hive_to_hbase_yousmile_20140704 select * fromhive_to_hbase_middle_yousmile_20140704;
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's noreduce operator
Starting Job = job_1404453897041_0002, Tracking URL =http://rs229:8088/proxy/application_1404453897041_0002/
Kill Command =/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/bin/hadoop job -kill job_1404453897041_0002
Hadoop job information for Stage-0: number ofmappers: 1; number of reducers: 0
2014-07-04 17:49:37,780 Stage-0 map = 0%, reduce = 0%
2014-07-04 17:50:05,963 Stage-0 map = 100%, reduce = 0%, Cumulative CPU 3.59 sec
MapReduce Total cumulative CPU time: 3 seconds 590msec
Ended Job = job_1404453897041_0002
MapReduce Jobs Launched:
Job 0: Map: 1 Cumulative CPU: 3.59 sec HDFSRead: 312 HDFS Write: 0 SUCCESS
Total MapReduce CPU Time Spent: 3 seconds 590 msec
OK
Time taken: 105.236 seconds
hive>
# 查看与Hbase关联的表hive_to_hbase_yousmile_20140704有数据了没有
hive> select * fromhive_to_hbase_yousmile_20140704;
OK
1 the
2 you
3 smile
4 until
5 forever
Time taken: 0.109 seconds, Fetched:5 row(s)
hive>
可以看到已经插入数据了
# 在Hbase Shell下查看刚刚在Hive下的数据是否已经被插入了
[root@rs229 ~]# hbase shell
hbase(main):002:0> list
TABLE
hive_hbase_combiner
hive_to_hbase_yousmile_20140704
yting_xmei1129
3 row(s) in 0.0260 seconds
=> ["hive_hbase_combiner","hive_to_hbase_yousmile_20140704", "yting_xmei1129"]
hbase(main):003:0> scan'hive_to_hbase_yousmile_20140704'
ROW COLUMN+CELL
1 column=cf1:info, timestamp=1404467409828, value=the
2 column=cf1:info, timestamp=1404467409828, value=you
3 column=cf1:info, timestamp=1404467409828, value=smile
4 column=cf1:info, timestamp=1404467409828, value=until
5 column=cf1:info,timestamp=1404467409828, value=forever
5 row(s) in 0.0740 seconds
hbase(main):004:0>
可以看到数据成功插入到Hbase,到这里,Hive To Hbase已经完成了
# 注意
注意:与hbase整合的有分区的表存在个问题 select * from table查询不到数据,selectkey,value from table可以查到数据
# Hbase To Hive(Hbase中的表数据导入到Hive)
# Hbase Shell下创建一张表
RowKey是gid string
列族是:info:time info:address
hbase(main):005:0> create'hbase_to_hive_yousmile_20140704','gid','info'
0 row(s) in 0.4420 seconds
=> Hbase::Table - hbase_to_hive_yousmile_20140704
hbase(main):006:0> put'hbase_to_hive_yousmile_20140704','3344520','info:time','20140704'
0 row(s) in 0.0760 seconds
hbase(main):008:0> put'hbase_to_hive_yousmile_20140704','3344520','info:address','beijing'
0 row(s) in 0.0760 seconds
hbase(main):009:0> scan'hbase_to_hive_yousmile_20140704'
ROW COLUMN+CELL
3344520 column=info:address, timestamp=1404634322078, value=beijing
3344520 column=info:time, timestamp=1404469280507, value=20140704
1 row(s) in 0.0200 seconds
hbase(main):010:0>
# Hive下创建表连接Hbase中的表
hive> create external tablehbase_to_hive_yousmile_20140704 (key string,gid map<string,string>)STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITHSERDEPROPERTIES ("hbase.columns.mapping" ="info:") TBLPROPERTIES ("hbase.table.name" = "hbase_to_hive_yousmile_20140704");
OK
Time taken: 0.222 seconds
hive> show tables;
OK
hbase_to_hive_yousmile_20140704
hive_to_hbase_middle_yousmile_20140704
hive_to_hbase_yousmile_20140704
Time taken: 0.027 seconds, Fetched: 3 row(s)
hive>
# 查询Hbase表中的数据
hive> select * fromhbase_to_hive_yousmile_20140704;
OK
3344520 {"address":"beijing","time":"20140704"}
Time taken: 0.156 seconds, Fetched: 1row(s)
hive>
可以看出在Hive下能查询出Hbase表中的数据了
# Hadoop,Hive,Hbase整合完毕
终于整合完毕了,这个整合弄到了一半的时候没时间弄了,中间耽搁了好久,公司有点小忙,所以细心的话可以看出中间的时间间隔是隔了一定的时间了,不过今天总算是整理完了,这正是一个历史性的时刻
# Hive连接Hbase的优化
注:hive连接hbase优化,将$HADOOP_HOME/conf中的hbase-site.xml文件中增加配置
<property>
<name>hbase.client.scanner.caching</name>
<value>10000</value>
</property>
或者在执行hive语句之前执行hive>set hbase.client.scanner.caching=10000;