Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

From:http://www.itnose.net/detail/6065872.html

# 需要软件

Hadoop-2.2.0(目前Apache官网最新的Stable版本)

Hbase-0.96.2(这里就用这个版本,跟Hadoop-2.2.0是配套的,不用覆盖jar包什么的)

Hive-0.13.1(目前是最新版本)

Zookeepr-3.4.6(这里推荐使用 3.4.5)

Jdk1.7.0_60(这里推荐使用1.7.0_45)

Mysql-5.5.31

# 集群结构图

NN : NameNode

JN : JournalNode

DN : DataNode

ZK : ZooKeeper

HM:HMster

HRS:HRegionServer

IP地址

主机名

ZK

NN

JN

DN

HRS

HM

HIVE

192.168.1.229

rs229

192.168.1.227

rs227

192.168.1.226

rs226

192.168.1.198

rs198

192.168.1.197

rs197

192.168.1.196

rs196

192.168.1.195

rs195

# Zookeeper-3.4.6

# zoo.cfg 配置文件的修改

[root@rs227 conf]# pwd

/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/conf

[root@rs227 conf]# ls

configuration.xsl log4j.properties  zoo.cfg  zookeeper.out zoo_sample.cfg

[root@rs227 conf]# cp zoo_sample.cfg zoo.cfg

[root@rs227 conf]# vi zoo.cfg

# The number of milliseconds of each tick

tickTime=2000

# The number of ticks that the initial

# synchronization phase can take

initLimit=10

# The number of ticks that can pass between

# sending a request and getting an acknowledgement

syncLimit=5

# the directory where the snapshot is stored.

# do not use /tmp for storage, /tmp here is just

# example sakes.

dataDir=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/data

dataLogDir=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/logs

# the port at which the clients will connect

clientPort=2181

# the maximum number of client connections.

# increase this if you need to handle more clients

#maxClientCnxns=60

#

# Be sure to read the maintenance section of the

# administrator guide before turning on autopurge.

#

#http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance

#

# The number of snapshots to retain in dataDir

#autopurge.snapRetainCount=3

# Purge task interval in hours

# Set to "0" to disable auto purge feature

#autopurge.purgeInterval=1

server.229=rs229:2888:3888

server.227=rs227:2888:3888

server.226=rs226:2888:3888

server.198=rs198:2888:3888

server.197=rs197:2888:3888

#server.196=rs196:2888:3888 只用5台,这两个是不用的

#server.195=rs195:2888:3888

# dataDir目录下创建 myid文件

然后每台主机写不同的id,比如:

rs229 中写入 229,

rs227 中写入 227,

以此类推 …

# 复制(SCP)到其它的服务器下去

然后只要修改…data/myid文件成对应的id就好了

rs229 中写入 229,

rs227 中写入 227,

以此类推 …

# Hadoop-2.2.0

# 修改7个配置文件

~/hadoop-2.2.0/etc/hadoop/hadoop-env.sh

~/hadoop-2.2.0/etc/hadoop/core-site.xml

~/hadoop-2.2.0/etc/hadoop/hdfs-site.xml

~/hadoop-2.2.0/etc/hadoop/mapred-site.xml

~/hadoop-2.2.0/etc/hadoop/yarn-env.sh

~/hadoop-2.2.0/etc/hadoop/yarn-site.xml

~/hadoop-2.2.0/etc/hadoop/slaves

# 1修改hadoop-env.sh配置文件(jdk 路径)

[root@masterhadoop]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop

[root@masterhadoop]# vi hadoop-env.sh

# The java implementation to use.

exportJAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_60

#export JAVA_HOME=${JAVA_HOME}

# 2修改core-site.xml文件修改 (注意fs.defaultFS的配置)

fs.defaultFS的配置中,value在rs229上就写rs229,在rs227上就写rs227,在哪台服务器上就写哪台服务器的主机名

[root@masterhadoop]# vi core-site.xml

<configuration>

<property>

<name>fs.defaultFS</name>

<value>hdfs://mycluster</value>

</property>

<property>

<name>hadoop.tmp.dir</name>

<value>/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp</value>

<description>Abase for other temporary directories.</description>

</property>

<property>

<name>dfs.nameservices</name>

<value>mycluster</value>

</property>

<property>

<name>ha.zookeeper.quorum</name>

<value>rs229:2181,rs227:2181,rs226:2181,rs198:2181,rs197:2181</value>

</property>

</configuration>

# 3修改hdfs-site.xml配置文件

[root@master hadoop]# vi hdfs-site.xml

<configuration>

<property>

<name>dfs.nameservices</name>

<value>mycluster</value>

</property>

<property>

<name>dfs.ha.namenodes.mycluster</name>

<value>rs229,rs227</value>

</property>

<property>

<name>dfs.namenode.rpc-address.mycluster.rs229</name>

<value>rs229:9000</value>

</property>

<property>

<name>dfs.namenode.rpc-address.mycluster.rs227</name>

<value>rs227:9000</value>

</property>

<property>

<name>dfs.namenode.http-address.mycluster.rs229</name>

<value>rs229:50070</value>

</property>

<property>

<name>dfs.namenode.http-address.mycluster.rs227</name>

<value>rs227:50070</value>

</property>

<property>

<name>dfs.namenode.shared.edits.dir</name>

<value>qjournal://rs229:8485;rs227:8485;rs226:8485/mycluster</value>

</property>

<property>

<name>dfs.ha.automatic-failover.enabled.mycluster</name>

<value>true</value>

</property>

<property>

<name>dfs.client.failover.proxy.provider.mycluster</name>

<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>

</property>

<property>

<name>dfs.ha.fencing.methods</name>

<value>sshfence</value>

</property>

<property>

<name>dfs.ha.fencing.ssh.private-key-files</name>

<value>/root/.ssh/id_rsa</value>

</property>

<property>

<name>dfs.journalnode.edits.dir</name>

<value>/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp/journal</value>

</property>

<property>

<name>dfs.replication</name>

<value>3</value>

</property>

<property>

<name>dfs.webhdfs.enabled</name>

<value>true</value>

</property>

</configuration>

# 4修改 mapred­-site.xml配置文件

[root@masterhadoop]# cp mapred-site.xml.template mapred-site.xml

[root@masterhadoop]# vi mapred-site.xml

<configuration>

<property>

<name>mapreduce.framework.name</name>

<value>yarn</value>

</property>

</configuration>

# 5修改yarn-env.sh配置文件

[root@masterhadoop]# vi yarn-env.sh

# some Javaparameters

exportJAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_60

# 6修改yarn-site.xml配置文件 (还是单点,你逗饿么?)

[root@masterhadoop]# vi yarn-site.xml

<configuration>

<!-- Site specific YARN configuration properties-->

<property>

<name>yarn.nodemanager.aux-services</name>

<value>mapreduce_shuffle</value>

</property>

<property>

<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>

<value>org.apache.hadoop.mapred.ShuffleHandler</value>

</property>

<property>

<name>yarn.resourcemanager.hostname</name>

<value>rs229</value>

</property>

</configuration>

# 7修改slaves配置文件

[root@masterhadoop]# vi slaves

rs229

rs227

rs226

rs198

rs197

rs196

rs195

# Hadoop配置结束,开始启动各个程序(笔记只保留重要日志信息)

# 在每个节点上启动Zookeeper

[root@rs229 zookeeper-3.4.6]# pwd

/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6

[root@rs229 zookeeper-3.4.6]#bin/zkServer.sh start

JMX enabled by default

Using config:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf/zoo.cfg

Starting zookeeper ... STARTED

[root@rs229 zookeeper-3.4.6]#

其它服务器也这样启动,这里就不写了…

# 验证Zookeeper是否启动成功1

在rs229上查看zookeeper的状态发现是leader

在其他的机器上查看zookeeper的状态发现是follower


[root@rs229 bin]# pwd

/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin

[root@rs229 bin]# ./zkServer.sh status

JMX enabled by default

Using config:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf/zoo.cfg

Mode: leader

[root@rs229 bin]#

#验证Zookeeper是否启动成功2

[root@rs229 hadoop]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop

[root@rs229 hadoop]#../../../../zookeeper/zookeeper-3.4.6/bin/zkCli.sh

Connecting to localhost:2181

2014-06-10 16:51:53,575 [myid:] - INFO  [main:Environment@100] - Clientenvironment:zookeeper.version=3.4.6-1569965, built on 02/20/2014 09:09 GMT

2014-06-10 16:51:53,580 [myid:] - INFO  [main:Environment@100] - Clientenvironment:host.name=master

2014-06-10 16:51:53,581 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.version=1.7.0_60

2014-06-10 16:51:53,584 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.vendor=Oracle Corporation

2014-06-10 16:51:53,584 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre

2014-06-10 16:51:53,584 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../build/classes:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../build/lib/*.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/slf4j-log4j12-1.6.1.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/slf4j-api-1.6.1.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/netty-3.7.0.Final.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/log4j-1.2.16.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../lib/jline-0.9.94.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../zookeeper-3.4.6.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../src/java/lib/*.jar:/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin/../conf:

2014-06-10 16:51:53,584 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.library.path=/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib

2014-06-10 16:51:53,584 [myid:] - INFO  [main:Environment@100] - Client environment:java.io.tmpdir=/tmp

2014-06-10 16:51:53,585 [myid:] - INFO  [main:Environment@100] - Clientenvironment:java.compiler=<NA>

2014-06-10 16:51:53,585 [myid:] - INFO  [main:Environment@100] - Clientenvironment:os.name=Linux

2014-06-10 16:51:53,585 [myid:] - INFO  [main:Environment@100] - Clientenvironment:os.arch=amd64

2014-06-10 16:51:53,585 [myid:] - INFO  [main:Environment@100] - Clientenvironment:os.version=2.6.32-279.el6.x86_64

2014-06-10 16:51:53,585 [myid:] - INFO  [main:Environment@100] - Client environment:user.name=root

2014-06-10 16:51:53,586 [myid:] - INFO  [main:Environment@100] - Clientenvironment:user.home=/root

2014-06-10 16:51:53,586 [myid:] - INFO  [main:Environment@100] - Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop

2014-06-10 16:51:53,588 [myid:] - INFO  [main:ZooKeeper@438] - Initiating clientconnection, connectString=localhost:2181 sessionTimeout=30000watcher=org.apache.zookeeper.ZooKeeperMain$MyWatcher@590aeb1f

Welcome to ZooKeeper!

2014-06-10 16:51:53,627 [myid:] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@975] - Openingsocket connection to server localhost/127.0.0.1:2181. Will not attempt toauthenticate using SASL (unknown error)

2014-06-10 16:51:53,634 [myid:] - INFO  [main-SendThread(localhost:2181):ClientCnxn$SendThread@852]- Socket connection established to localhost/127.0.0.1:2181, initiating session

JLine support is enabled

2014-06-10 16:51:53,646 [myid:] - INFO [main-SendThread(localhost:2181):ClientCnxn$SendThread@1235] - Sessionestablishment complete on server localhost/127.0.0.1:2181, sessionid =0xe5467eb575d20003, negotiated timeout = 30000

WATCHER::

WatchedEvent state:SyncConnected type:None path:null

[zk: localhost:2181(CONNECTED) 0] ls /

[zookeeper]

[zk: localhost:2181(CONNECTED) 1]

出现这样的提示的话,那么zookeeper就启动成功了

# 在rs229上格式化Zookeeper

[root@rs229 sbin]# ../bin/hdfszkfc -formatZK

14/06/10 16:55:41 WARN util.NativeCodeLoader: Unableto load native-hadoop library for your platform... using builtin-java classeswhere applicable

14/06/10 16:55:41 INFO tools.DFSZKFailoverController:Failover controller configured for NameNode NameNode atrs229/116.255.224.229:9000

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:host.name=master

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.version=1.7.0_60

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.vendor=Oracle Corporation

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop:…

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.io.tmpdir=/tmp

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:java.compiler=<NA>

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.name=Linux

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.arch=amd64

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.name=root

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.home=/root

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin

14/06/10 16:55:41 INFO zookeeper.ZooKeeper:Initiating client connection, connectString=rs229:2181,rs227:2181,rs226:2181sessionTimeout=5000 watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@3a469fea

14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Openingsocket connection to server master/116.255.224.229:2181. Will not attempt toauthenticate using SASL (unknown error)

14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Socketconnection established to master/116.255.224.229:2181, initiating session

14/06/10 16:55:41 INFO zookeeper.ClientCnxn: Sessionestablishment complete on server master/116.255.224.229:2181, sessionid =0xe5467eb575d20004, negotiated timeout = 5000

14/06/10 16:55:41 INFOha.ActiveStandbyElector: Session connected.

14/06/10 16:55:41 INFOha.ActiveStandbyElector: Successfully created /hadoop-ha/mycluster in ZK.

14/06/10 16:55:41 INFO zookeeper.ZooKeeper: Session:0xe5467eb575d20004 closed

14/06/10 16:55:41 INFO zookeeper.ClientCnxn:EventThread shut down

# 验证zkfc是否格式化成功

[root@rs229 bin]# pwd

/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/bin

[root@rs229 bin]# ./zkCli.sh

[zk: localhost:2181(CONNECTED) 0] ls /

[hadoop-ha, zookeeper]

[zk: localhost:2181(CONNECTED) 2] ls /hadoop-ha

[mycluster]

可以看到使用ls命令后多了一个hadoop-ha,这样就成功了

# 完全分布式 启动Hadoop(切记顺序不能乱)

# 在 rs229,rs227,rs226 上分别启动 journalnode

# 启动rs229的journalNode

[root@rs229 sbin]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin

[root@rs229 sbin]# ./hadoop-daemon.sh startjournalnode

[root@rs229 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs229.log(查看日志是否报错,不报错你就赢了一半了)

# 启动rs227的journalNode

[root@rs227 sbin]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin

[root@rs227 sbin]# ./hadoop-daemon.sh startjournalnode

[root@rs227 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs227.log(查看日志是否报错,不报错你就赢了一半了)

[root@rs226 ~]# cd/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin/

[root@rs226 sbin]# ./hadoop-daemon.sh startjournalnode

starting journalnode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs226.out

[root@rs226 sbin]# jps

16799 Jps

20960 QuorumPeerMain

16732 JournalNode

# 启动rs226的journalNode

[root@rs226 sbin]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/sbin

[root@rs226 sbin]# ./hadoop-daemon.sh startjournalnode

[root@rs226 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-journalnode-rs226.log(查看日志是否报错,不报错你就赢了一半了)

# 在 rs229,rs227上分别格式化和启动namenode

从rs229和rs227中任选一个即可,这里选择的是229

# 格式化跟启动rs229上的namenode

[root@rs229 sbin]# ../bin/hdfs namenode ?format

[root@rs229 sbin]# ./hadoop-daemon.sh start namenode

# 在这个过程中饿抱了一个错误java.net.BindException:

Problem binding to [rs229:9000]java.net.BindException: Address already in use; For more details see:  http://wiki.apache.org/hadoop/BindException,如何解决请看下面的 # ?那伊抹微笑搭建hadoop环境出现的问题(仅供参考)中的 Hadoop-2.X错误中的原因分析以及解决

# 将rs229上namenode的数据同步到rs227中去,需要在rs227上执行hadoop的命令

[root@rs227 sbin]# ../bin/hdfs namenode-bootstrapStandby

[root@rs227 sbin]# ./hadoop-daemon.sh start namenode

[root@rs227 sbin]# cd ../tmp/

[root@rs227 tmp]# ll(可以看到数据已经从rs229同步到rs227上去了)

total 8

drwxr-xr-x 3 root root 4096 Jun 10 17:13 dfs

drwxr-xr-x 3 root root 4096 Jun 10 17:09 journal

下面是namenode数据同步的重要部分的日志信息

14/06/10 17:13:33 INFO namenode.NameNode: registeredUNIX signal handlers for [TERM, HUP, INT]

14/06/10 17:13:34 WARN util.NativeCodeLoader: Unableto load native-hadoop library for your platform... using builtin-java classeswhere applicable

=====================================================

About to bootstrap Standby ID rs227from:

Nameservice ID: mycluster

Other Namenode ID: rs229

Other NN's HTTP address: rs229:50070

Other NN's IPC  address:rs229/116.255.224.229:9000

Namespace ID: 819891643

Block pool ID:BP-1742810392-116.255.224.229-1402391347726

Cluster ID: mycluster

Layout version: -47

=====================================================

14/06/10 17:13:34 INFO common.Storage: Storagedirectory /usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/tmp/dfs/name hasbeen successfully formatted.

14/06/10 17:13:34 INFO namenode.TransferFsImage:Opening connection tohttp://rs229:50070/getimage?getimage=1&txid=0&storageInfo=-47:819891643:0:mycluster

14/06/10 17:13:34 INFO namenode.TransferFsImage:Transfer took 0.14s at 0.00 KB/s

14/06/10 17:13:34 INFO namenode.TransferFsImage:Downloaded file fsimage.ckpt_0000000000000000000 size 196 bytes.

14/06/10 17:13:35 INFO util.ExitUtil: Exiting withstatus 0

# 打开浏览器,访问rs229跟rs227的50070端口Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转) 

如果都能访问到,说明你namenode启动成功了,并且这两个namenode都是standby状态

# namenode(rs229)转换成active(这里不需要手动将namenode转换为active状态了,因为我们是交给Zookeeper管理,在后面会启动ZooKeeperFailoverController)

# 启动所有的 datanodes(在rs229上执行命令)

[root@rs229 sbin]# ./hadoop-daemons.sh start datanode(查看日志,没报错你就赢了)

rs198: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs198.out

rs197: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs197.out

rs196: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs196.out

rs195: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs195.out

rs227: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs227.out

rs226: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs226.out

rs229: starting datanode, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-datanode-rs229.out

[root@rs229 sbin]# jps

25627 Jps

24037 NameNode

25168 DataNode

23343 JournalNode

29367 QuorumPeerMain

# 实验一下手动切换namenode的状态(这里也不需要做,Zookeeper管理的,自动切换,下面会讲到)

# yarn启动

[root@rs229 sbin]# ./start-yarn.sh

starting yarn daemons

starting resourcemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-resourcemanager-rs229.out

rs229: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs229.out

rs196: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS196.out

rs197: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS197.out

rs226: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs226.out

rs198: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS198.out

rs227: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-rs227.out

rs195: starting nodemanager, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/yarn-root-nodemanager-RS195.out

# 访问rs229的8088端口查看ResourceManager的UI界面

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转) 

# 启动ZooKeeperFailoverController(在rs229,rs227上执行命令,日志重点地方已经使用红颜色的字体标出)

#在rs229上执行命令

[root@rs229 sbin]# ./hadoop-daemon.sh start zkfc

starting zkfc, logging to /usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.out

[root@rs229 sbin]# tail -100fusr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log

tail: cannot open`usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log'for reading: No such file or directory

[root@rs229 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs229.log

2014-06-10 17:45:47,887 WARNorg.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop libraryfor your platform... using builtin-java classes where applicable

2014-06-10 17:45:47,889 INFOorg.apache.hadoop.hdfs.tools.DFSZKFailoverController: Failover controllerconfigured for NameNode NameNode at rs229/116.255.224.229:9000

2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT

2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:host.name=master

2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.version=1.7.0_60

2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.vendor=OracleCorporation

2014-06-10 17:45:48,089 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre

2014-06-10 17:45:48,090 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop…

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp

2014-06-10 17:45:48,091 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.compiler=<NA>

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.name=Linux

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.arch=amd64

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.name=root

2014-06-10 17:45:48,091 INFO org.apache.zookeeper.ZooKeeper:Client environment:user.home=/root

2014-06-10 17:45:48,091 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0

2014-06-10 17:45:48,092 INFOorg.apache.zookeeper.ZooKeeper: Initiating client connection,connectString=rs229:2181,rs227:2181,rs226:2181 sessionTimeout=5000watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@4e9cba32

2014-06-10 17:45:48,121 INFOorg.apache.zookeeper.ClientCnxn: Opening socket connection to serverrs226/116.255.224.226:2181. Will not attempt to authenticate using SASL(unknown error)

2014-06-10 17:45:48,127 INFOorg.apache.zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session

2014-06-10 17:45:48,147 INFOorg.apache.zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0000, negotiated timeout= 5000

2014-06-10 17:45:48,153 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Session connected.

2014-06-10 17:45:48,236 INFOorg.apache.hadoop.ipc.Server: Starting Socket Reader #1 for port 8019

2014-06-10 17:45:48,273 INFOorg.apache.hadoop.ipc.Server: IPC Server Responder: starting

2014-06-10 17:45:48,273 INFOorg.apache.hadoop.ipc.Server: IPC Server listener on 8019: starting

2014-06-10 17:45:48,403 INFOorg.apache.hadoop.ha.HealthMonitor: Entering state SERVICE_HEALTHY

2014-06-10 17:45:48,403 INFOorg.apache.hadoop.ha.ZKFailoverController: Local service NameNode atrs229/116.255.224.229:9000 entered state: SERVICE_HEALTHY

2014-06-10 17:45:48,421 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Checking for any old active whichneeds to be fenced...

2014-06-10 17:45:48,440 INFOorg.apache.hadoop.ha.ActiveStandbyElector: No old node to fence

2014-06-10 17:45:48,440 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Writing znode/hadoop-ha/mycluster/ActiveBreadCrumb to indicate that the local node is themost recent active...

2014-06-10 17:45:48,447 INFOorg.apache.hadoop.ha.ZKFailoverController: Trying to make NameNode atrs229/116.255.224.229:9000 active...

2014-06-10 17:45:48,786 INFOorg.apache.hadoop.ha.ZKFailoverController: Successfully transitioned NameNodeat rs229/116.255.224.229:9000 to active state

[root@rs229 sbin]# jps

24037 NameNode

25168 DataNode

26012 NodeManager

25891 ResourceManager

23343 JournalNode

27026 DFSZKFailoverController

29367 QuorumPeerMain

27208 Jps

#在rs227上执行命令

[root@rs227 sbin]# ./hadoop-daemon.sh start zkfc

starting zkfc, logging to/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs227.out

[root@rs227 sbin]# tail -100f/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/logs/hadoop-root-zkfc-rs227.log

2014-06-10 17:46:39,078 WARNorg.apache.hadoop.util.NativeCodeLoader: Unable to load native-hadoop libraryfor your platform... using builtin-java classes where applicable

2014-06-10 17:46:39,080 INFOorg.apache.hadoop.hdfs.tools.DFSZKFailoverController: Failover controllerconfigured for NameNode NameNode at rs227/116.255.224.227:9000

2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT

2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:host.name=rs227

2014-06-10 17:46:39,285 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.version=1.7.0_60

2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.vendor=OracleCorporation

2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.home=/usr/local/adsit/yting/jdk/jdk1.7.0_60/jre

2014-06-10 17:46:39,285 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:java.class.path=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop…

2014-06-10 17:46:39,286 INFO org.apache.zookeeper.ZooKeeper:Clientenvironment:java.library.path=/usr/local/adsit/yting/apache/hadoop/hadoop-1.1.2/lib

2014-06-10 17:46:39,286 INFOorg.apache.zookeeper.ZooKeeper: Client environment:java.io.tmpdir=/tmp

2014-06-10 17:46:39,286 INFO org.apache.zookeeper.ZooKeeper:Client environment:java.compiler=<NA>

2014-06-10 17:46:39,286 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.name=Linux

2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:os.arch=amd64

2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:os.version=2.6.32-279.el6.x86_64

2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.name=root

2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Client environment:user.home=/root

2014-06-10 17:46:39,287 INFOorg.apache.zookeeper.ZooKeeper: Clientenvironment:user.dir=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0

2014-06-10 17:46:39,288 INFOorg.apache.zookeeper.ZooKeeper: Initiating client connection,connectString=rs229:2181,rs227:2181,rs226:2181 sessionTimeout=5000watcher=org.apache.hadoop.ha.ActiveStandbyElector$WatcherWithClientRef@5f997454

2014-06-10 17:46:39,317 INFOorg.apache.zookeeper.ClientCnxn: Opening socket connection to server rs226/116.255.224.226:2181.Will not attempt to authenticate using SASL (unknown error)

2014-06-10 17:46:39,323 INFOorg.apache.zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session

2014-06-10 17:46:39,335 INFOorg.apache.zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0001, negotiated timeout= 5000

2014-06-10 17:46:39,340 INFOorg.apache.hadoop.ha.ActiveStandbyElector: Session connected.

2014-06-10 17:46:39,409 INFOorg.apache.hadoop.ipc.Server: Starting Socket Reader #1 for port 8019

2014-06-10 17:46:39,445 INFOorg.apache.hadoop.ipc.Server: IPC Server Responder: starting

2014-06-10 17:46:39,445 INFOorg.apache.hadoop.ipc.Server: IPC Server listener on 8019: starting

2014-06-10 17:46:39,608 INFOorg.apache.hadoop.ha.HealthMonitor: Entering state SERVICE_HEALTHY

2014-06-10 17:46:39,608 INFOorg.apache.hadoop.ha.ZKFailoverController: Local service NameNode atrs227/116.255.224.227:9000 entered state: SERVICE_HEALTHY

2014-06-10 17:46:39,636 INFOorg.apache.hadoop.ha.ZKFailoverController: ZK Election indicated that NameNodeat rs227/116.255.224.227:9000 should become standby

2014-06-10 17:46:39,656 INFOorg.apache.hadoop.ha.ZKFailoverController: Successfully transitioned NameNodeat rs227/116.255.224.227:9000 to standby state

[root@rs227 sbin]# jps

17710 Jps

17338 NodeManager

16725 NameNode

17627 DFSZKFailoverController

27264 QuorumPeerMain

16495 JournalNode

17091 DataNode

# 打开浏览器,再访问rs229跟rs227的50070端口

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

发现rs229变成active状态了,而rs227还是standby状态

# 验证HDFS是否好用

[root@rs229 sbin]# ../bin/hadoop fs -putyarn-daemon.sh /yting

[root@rs229 sbin]# ../bin/hadoop fs -ls /yting

Found 1 items

-rw-r--r--   3root supergroup       4278 2014-06-1018:29 /yting/yarn-daemon.sh

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

# 验证YARN是否好用

[root@rs229 bin]# pwd

/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/bin

[root@rs229 bin]# ./hadoop jar../share/hadoop/mapreduce/hadoop-mapreduce-examples-2.2.0.jar pi 10 100

…(不重要的部分就省略了,能出这个值就是对的,虚拟机可能会卡着不动,也可能会卡死,属于正常现象,内存消耗比较大)

Job Finished in 25.361 seconds

valueof Pi is 3.14800000000000000000

# 验证HA高可用性,是否自动故障转移

# 打开浏览器,访问rs229跟rs227的50070端口

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转) 
Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转) 

发现rs229从为active状态,而rs227为standby状态

在rs229上直接kill掉namenode进程

[root@rs229 bin]# jps

31742 ResourceManager

32287 DFSZKFailoverController

31356 NameNode

31007 JournalNode

29367 QuorumPeerMain

31862 NodeManager

2530 Jps

31592 DataNode

[root@rs229 bin]# kill -9 31356

[root@rs229 bin]# jps

31742 ResourceManager

32287 DFSZKFailoverController

2578 Jps

31007 JournalNode

29367 QuorumPeerMain

31862 NodeManager

31592 DataNode

进程已经被kill掉了

# 打开浏览器,再访问rs229跟rs227的50070端口

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

Hadoop-2.2.0 + Hbase-0.96.2 + Hive-0.13.1(转)

发现rs229无法访问,而rs227 从standby转换成active状态,成功了

这时候在rs229上使用hdfs跟mapreduce还是可以正常运行的,尽管rs229上的namenode进程已经被kill掉,这就是故障转移的好处啊!

# Hbase-0.96.2-hadoop2(启动双HMaster的配置,rs229是主HMaster,rs227是从HMaster)

# 解压Hbase-0.96.2-hadoop2-bin.tar.gz

[root@rs229 hbase-0.96.2-hadoop2]# pwd

/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2

[root@rs229 hbase-0.96.2-hadoop2]# tar -zxvfhbase-0.96.2-hadoop2-bin.tar.gz

[root@rs229 hbase]# ll

total 77516

drwxr-xr-x 12 root root     4096 May 21 16:38 hbase-0.94.19

drwxr-xr-x  7root root     4096 Jun 11 17:10hbase-0.96.2-hadoop2

-rw-r--r--  1root root 79367504 Jun 11 17:10 hbase-0.96.2-hadoop2-bin.tar.gz

# 修改hbase-env.sh 文件

[root@rs229 conf]# pwd

/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/conf

[root@master conf]# vi hbase-env.sh

将 JAVA_HOME 的注释去掉,并把路径修改正确

# The java implementation to use.  Java 1.6 required.

export JAVA_HOME=/usr/local/adsit/yting/jdk/jdk1.7.0_6

# Tell HBase whether it should manage it's owninstance of Zookeeper or not.

export HBASE_MANAGES_ZK=false

# 配置hbase-site.xml 文件

<configuration>

<property>

<name>hbase.rootdir</name>

<value>hdfs://mycluster/hbase</value><!--这里必须跟core-site.xml中的配置一样-->

</property>

<property>

<name>hbase.cluster.distributed</name>

<value>true</value>

</property>

<property>

<name>hbase.tmp.dir</name>

<value>/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/tmp</value>

</property>

<property>

<name>hbase.master</name>

<value>60000</value> # 这里是对的,只配置端口,为了配置多个HMaster

</property>

<property>

<name>hbase.zookeeper.quorum</name>

<value>rs229,rs227,rs226,rs198,rs197</value>

</property>

<property>

<name>hbase.zookeeper.property.clientPort</name>

<value>2181</value>

</property>

<property>

<name>hbase.zookeeper.property.dataDir</name>

<value>/usr/local/adsit/yting/apache/zookeeper/zookeeper-3.4.6/data</value>

</property>

</configuration>

# 配置regionservers

[root@rs229 conf]# cat regionservers

rs195

rs196

rs197

rs198

rs226

# 创建hdfs-site.xml的软连接

[root@rs229 conf]# pwd

/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/conf

[root@rs229 conf]# ln/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/etc/hadoop/hdfs-site.xmlhdfs-site.xml

# 配置profile文件,以便可以直接使用相关的命令(这里就不说了)

# jar包覆盖,这个版本hbase-0.96.2不需要覆盖jar包,跟hadoop-2.2.0的版本jar包是一样的,看下面

[root@rs229 lib]# ll | grep hadoop

-rw-r--r-- 1 root root    16778 Jun 11 18:15hadoop-annotations-2.2.0.jar

-rw-r--r-- 1 root root    49750 Jun 11 18:16 hadoop-auth-2.2.0.jar

-rw-r--r-- 1 root root     2559 Jun 11 18:16 hadoop-client-2.2.0.jar

-rw-r--r-- 1 root root  2735584 Jun 11 18:15 hadoop-common-2.2.0.jar

-rw-r--r-- 1 root root  5242252 Jun 11 18:15 hadoop-hdfs-2.2.0.jar

-rw-r--r-- 1 root root  1988460 Jun 11 18:16hadoop-hdfs-2.2.0-tests.jar

-rw-r--r-- 1 root root   482042 Jun 11 18:16hadoop-mapreduce-client-app-2.2.0.jar

-rw-r--r-- 1 root root   656365 Jun 11 18:15hadoop-mapreduce-client-common-2.2.0.jar

-rw-r--r-- 1 root root  1455001 Jun 11 18:15hadoop-mapreduce-client-core-2.2.0.jar

-rw-r--r-- 1 root root    35216 Jun 11 18:16hadoop-mapreduce-client-jobclient-2.2.0.jar

-rw-r--r-- 1 root root  1434852 Jun 11 18:15hadoop-mapreduce-client-jobclient-2.2.0-tests.jar

-rw-r--r-- 1 root root    21537 Jun 11 18:15hadoop-mapreduce-client-shuffle-2.2.0.jar

-rw-r--r-- 1 root root  1158936 Jun 11 18:15hadoop-yarn-api-2.2.0.jar

-rw-r--r-- 1 root root    94728 Jun 11 18:16hadoop-yarn-client-2.2.0.jar

-rw-r--r-- 1 root root  1301627 Jun 11 18:15hadoop-yarn-common-2.2.0.jar

-rw-r--r-- 1 root root   175554 Jun 11 18:16hadoop-yarn-server-common-2.2.0.jar

-rw-r--r-- 1 root root   467638 Jun 11 18:16hadoop-yarn-server-nodemanager-2.2.0.jar

# 启动hbase

[root@rs229 hbase-0.96.2-hadoop2]# ./bin/start-hbase.sh

starting master, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs229.out

rs197: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS197.out

rs227: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS227.out

rs195: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS195.out

rs226: starting regionserver, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-rs226.out

rs198: starting regionserver, logging to /usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-regionserver-RS198.out

[root@rs229 hbase-0.96.2-hadoop2]# jps

5131 Jps

4827 HRegionServer

4661 HMaster

6395 NodeManager

6272 DataNode

29849 QuorumPeerMain

# hbase shell 验证 1(查看hbase的版本跟状态)

hbase(main):003:0> list  # 刚刚创建的表

TABLE

user

yting

2 row(s) in 0.0800 seconds

=> ["user", "yting"]

hbase(main):004:0> version

0.96.2-hadoop2, r1581096, Mon Mar 24 16:03:18 PDT2014

hbase(main):005:0> status

5 servers, 0 dead, 0.8000 average load

hbase(main):006:0>

# hbase shell 验证 2(建表插入数据获取数据实时)

hbase(main):006:0> create'yting_xmei1129','uid','info'

0 row(s) in 0.4706 seconds

=> Hbase::Table - yting_xmei1129

hbase(main):007:0> put'yting_xmei1129','1314520','info:yousmile','forever'

0 row(s) in 0.1350 seconds

hbase(main):008:0> get 'yting_xmei1129','1314520'

COLUMN                                             CELL

info:yousmile                                     timestamp=1402569647483, value=forever

1 row(s) in 0.0530 seconds

hbase(main):009:0> scan 'yting_xmei1129'

ROW                                                COLUMN+CELL

1314520                                           column=info:yousmile, timestamp=1402569647483, value=forever

1 row(s) in 0.0500 seconds

hbase(main):010:0>

# 在rs227上启动HMaster

[root@rs227 bin]# ./hbase-daemon.sh start master

starting master, logging to/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs227.out

[root@rs227 bin]# tail -100f/usr/local/adsit/yting/apache/hbase/hbase-0.96.2-hadoop2/bin/../logs/hbase-root-master-rs227.log

2014-07-03 15:43:47,798 INFO  [master:rs227:60000] mortbay.log: StartedSelectChannelConnector@0.0.0.0:60010

2014-07-03 15:43:47,897 INFO  [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/master already exists and this isnot a retry

2014-07-03 15:43:47,898 INFO  [master:rs227:60000]master.ActiveMasterManager: Adding ZNode for/hbase/backup-masters/rs227,60000,1402645426368 in backup master directory

2014-07-03 15:43:47,908 INFO  [master:rs227:60000] master.ActiveMasterManager:Another master is the active master, rs229,60000,1402645371520; waiting tobecome the next active master

# 验证HMaster自动切换

# rs227上的日志查看

2014-07-03 15:43:47,798 INFO  [master:rs227:60000] mortbay.log: StartedSelectChannelConnector@0.0.0.0:60010

2014-07-03 15:43:47,897 INFO  [master:rs22760000]zookeeper.RecoverableZooKeeper: Node /hbase/master already exists and this isnot a retry

2014-07-03 15:43:47,898 INFO  [master:rs227:60000]master.ActiveMasterManager: Adding ZNode for/hbase/backup-masters/rs227,60000,1402645426368 in backup master directory

2014-07-03 15:43:47,908 INFO  [master:rs227:60000] master.ActiveMasterManager:Another master is the active master, rs229,60000,1402645371520; waiting tobecome the next active master

这里说明zookeeper已经接管了,并且把rs227作为一个备份的Hbase了,并且这里提示

waiting to become thenext active master(等待变成下一个活动的master),然后我们可以将rs229上的hmaster进程给kill掉,当然,也可以使用 ./hbase-daemon.shstop master 来结束rs229上的hmaster进程

# rs229上的日志查看

2014-07-03 15:43:00,670 INFO  [master:rs229:60000]master.AssignmentManager: Found regions out on cluster or in RIT; presumingfailover

2014-07-03 15:43:00,772 DEBUG [master:rs229:60000]hbase.ZKNamespaceManager: Updating namespace cache from node default with data:\x0A\x07default

2014-07-03 15:43:00,775 DEBUG [master:rs229:60000]hbase.ZKNamespaceManager: Updating namespace cache from node hbase with data:\x0A\x05hbase

2014-07-03 15:43:00,796 INFO  [master:rs229:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/default already existsand this is not a retry

2014-07-03 15:43:00,803 INFO  [master:rs229:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/hbase already exists andthis is not a retry

2014-07-03 15:43:00,806 INFO  [master:rs229:60000] master.HMaster: Masterhas completed initialization

# kill掉rs229上的hmaster进程,看看rs227上的日志会有什么变化

[root@RS229 conf]# jps

27324 HRegionServer

6395 NodeManager

6272 DataNode

28517 HMaster

28797 Jps

29849 QuorumPeerMain

[root@RS229 conf]# kill -9 28517

[root@RS229conf]# jps

27324 HRegionServer

6395 NodeManager

6272 DataNode

28973 Jps

29849 QuorumPeerMain

[root@RS229 conf]#

# 下面是rs227上日志变化后的信息,所有信息全部复制下来了

2014-07-03 15:49:49,644 DEBUG [main-EventThread]master.ActiveMasterManager: No master available.Notifying waiting threads

2014-07-03 15:49:49,649 DEBUG [main-EventThread]master.ActiveMasterManager: A master is now available

2014-07-03 15:49:49,649 INFO  [master:rs227:60000]master.ActiveMasterManager: Deleting ZNode for/hbase/backup-masters/rs227,60000,1402645426368 from backup master directory

2014-07-03 15:49:49,655 INFO  [master:rs227:60000]master.ActiveMasterManager: Registered Active Master=rs227,60000,1402645426368

2014-07-03 15:49:49,662 INFO  [master:rs227:60000]Configuration.deprecation: fs.default.name is deprecated. Instead, usefs.defaultFS

2014-07-03 15:49:49,980 INFO  [master:rs227:60000]Configuration.deprecation: hadoop.native.lib is deprecated. Instead, useio.native.lib.available

2014-07-03 15:49:50,097 DEBUG [master:rs227:60000]util.FSTableDescriptors: Current tableInfoPath = hdfs://mycluster/hbase/data/hbase/meta/.tabledesc/.tableinfo.0000000001

2014-07-03 15:49:50,120 DEBUG [master:rs227:60000]util.FSTableDescriptors: TableInfo already exists.. Skipping creation

2014-07-03 15:49:50,178 INFO  [master:rs227:60000] fs.HFileSystem: Addedintercepting call to namenode#getBlockLocations so can do block reorderingusing class class org.apache.hadoop.hbase.fs.HFileSystem$ReorderWALBlocks

2014-07-03 15:49:50,187 INFO  [master:rs227:60000] master.SplitLogManager:Timeout=120000, unassigned timeout=180000, distributedLogReplay=false

2014-07-03 15:49:50,192 INFO  [master:rs227:60000] master.SplitLogManager:Found 0 orphan tasks and 0 rescan nodes

2014-07-03 15:49:50,223 INFO  [master:rs227:60000] zookeeper.ZooKeeper:Initiating client connection, connectString=rs227:2181,rs229:2181,rs226:2181sessionTimeout=90000 watcher=hconnection-0x4dbe91f3,quorum=rs227:2181,rs229:2181,rs226:2181, baseZNode=/hbase

2014-07-03 15:49:50,224 INFO  [master:rs227:60000]zookeeper.RecoverableZooKeeper: Process identifier=hconnection-0x4dbe91f3connecting to ZooKeeper ensemble=rs227:2181,rs229:2181,rs226:2181

2014-07-03 15:49:50,224 INFO  [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Opening socket connection to serverrs226/116.255.224.226:2181. Will not attempt to authenticate using SASL(unknown error)

2014-07-03 15:49:50,225 INFO  [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Socket connection established tors226/116.255.224.226:2181, initiating session

2014-07-03 15:49:50,229 INFO  [master:rs227:60000-SendThread(rs226:2181)]zookeeper.ClientCnxn: Session establishment complete on serverrs226/116.255.224.226:2181, sessionid = 0xe2467eb575ce0049, negotiated timeout= 40000

2014-07-03 15:49:50,255 DEBUG [master:rs227:60000]catalog.CatalogTracker: Starting catalog trackerorg.apache.hadoop.hbase.catalog.CatalogTracker@2cb8709b

2014-07-03 15:49:50,333 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs227,60020,1402643884463data: PBU锟斤拷

2014-07-03 15:49:50,334 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs226,60020,1402643884476data: PBU锟斤拷

2014-07-03 15:49:50,335 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs198,60020,1402643884392data: PBU锟斤拷

2014-07-03 15:49:50,336 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs229,60020,1402643884248data: PBU锟斤拷

2014-07-03 15:49:50,337 DEBUG [master:rs227:60000]zookeeper.RegionServerTracker: RS node: /hbase/rs/rs197,60020,1402643884579data: PBU锟斤拷

2014-07-03 15:49:50,339 INFO  [master:rs227:60000] master.HMaster: Serveractive/primary master=rs227,60000,1402645426368, sessionid=0xe3467eb575cc003c,setting cluster-up flag (Was=true)

2014-07-03 15:49:50,361 INFO  [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/online-snapshot/acquired alreadyexists and this is not a retry

2014-07-03 15:49:50,363 INFO  [master:rs227:60000]procedure.ZKProcedureUtil: Clearing all procedure znodes:/hbase/online-snapshot/acquired /hbase/online-snapshot/reached/hbase/online-snapshot/abort

2014-07-03 15:49:50,365 DEBUG [master:rs227:60000]procedure.ZKProcedureCoordinatorRpcs: Starting the controller for proceduremember:rs227,60000,1402645426368

2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=MASTER_OPEN_REGION-rs227:60000,corePoolSize=5, maxPoolSize=5

2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=MASTER_CLOSE_REGION-rs227:60000,corePoolSize=5, maxPoolSize=5

2014-07-03 15:49:50,378 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_SERVER_OPERATIONS-rs227:60000, corePoolSize=5, maxPoolSize=5

2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_META_SERVER_OPERATIONS-rs227:60000, corePoolSize=5, maxPoolSize=5

2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor service name=M_LOG_REPLAY_OPS-rs227:60000,corePoolSize=10, maxPoolSize=10

2014-07-03 15:49:50,379 DEBUG [master:rs227:60000]executor.ExecutorService: Starting executor servicename=MASTER_TABLE_OPERATIONS-rs227:60000, corePoolSize=1, maxPoolSize=1

2014-07-03 15:49:50,381 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.cleaner.TimeToLiveLogCleaner

2014-07-03 15:49:50,386 INFO  [master:rs227:60000] zookeeper.ZooKeeper:Initiating client connection, connectString=rs227:2181,rs229:2181,rs226:2181sessionTimeout=90000 watcher=replicationLogCleaner,quorum=rs227:2181,rs229:2181,rs226:2181, baseZNode=/hbase

2014-07-03 15:49:50,387 INFO  [master:rs227:60000]zookeeper.RecoverableZooKeeper: Process identifier=replicationLogCleanerconnecting to ZooKeeper ensemble=rs227:2181,rs229:2181,rs226:2181

2014-07-03 15:49:50,387 INFO  [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Opening socket connection to serverrs227/116.255.224.227:2181. Will not attempt to authenticate using SASL(unknown error)

2014-07-03 15:49:50,388 INFO  [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Socket connection established tors227/116.255.224.227:2181, initiating session

2014-07-03 15:49:50,391 INFO  [master:rs227:60000-SendThread(rs227:2181)]zookeeper.ClientCnxn: Session establishment complete on serverrs227/116.255.224.227:2181, sessionid = 0xe3467eb575cc003e, negotiated timeout= 40000

2014-07-03 15:49:50,400 INFO  [master:rs227:60000] zookeeper.RecoverableZooKeeper:Node /hbase/replication/rs already exists and this is not a retry

2014-07-03 15:49:50,400 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.replication.master.ReplicationLogCleaner

2014-07-03 15:49:50,404 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.snapshot.SnapshotLogCleaner

2014-07-03 15:49:50,406 DEBUG [master:rs227:60000]cleaner.CleanerChore: initialize cleaner=org.apache.hadoop.hbase.master.cleaner.HFileLinkCleaner

2014-07-03 15:49:50,408 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.snapshot.SnapshotHFileCleaner

2014-07-03 15:49:50,409 DEBUG [master:rs227:60000]cleaner.CleanerChore: initializecleaner=org.apache.hadoop.hbase.master.cleaner.TimeToLiveHFileCleaner

2014-07-03 15:49:50,409 INFO  [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 0, slept for 0ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms, intervalof 1500 ms.

2014-07-03 15:49:51,914 INFO  [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 0, slept for1505 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.

2014-07-03 15:49:53,361 INFO  [RpcServer.handler=4,port=60000]master.ServerManager: Registering server=rs197,60020,1402643884579

2014-07-03 15:49:53,362 INFO  [RpcServer.handler=2,port=60000] master.ServerManager:Registering server=rs227,60020,1402643884463

2014-07-03 15:49:53,362 INFO  [RpcServer.handler=0,port=60000]master.ServerManager: Registering server=rs198,60020,1402643884392

2014-07-03 15:49:53,361 INFO  [RpcServer.handler=3,port=60000] master.ServerManager:Registering server=rs229,60020,1402643884248

2014-07-03 15:49:53,362 INFO  [RpcServer.handler=1,port=60000]master.ServerManager: Registering server=rs226,60020,1402643884476

2014-07-03 15:49:53,369 INFO  [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 5, slept for2960 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.

2014-07-03 15:49:54,873 INFO  [master:rs227:60000] master.ServerManager:Waiting for region servers count to settle; currently checked in 5, slept for4464 ms, expecting minimum of 1, maximum of 2147483647, timeout of 4500 ms,interval of 1500 ms.

2014-07-03 15:49:54,923 INFO  [master:rs227:60000] master.ServerManager:Finished waiting for region servers count to settle; checked in 5, slept for4514 ms, expecting minimum of 1, maximum of 2147483647, master is running.

2014-07-03 15:49:54,928 INFO  [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs229,60020,1402643884248 belongs to anexisting region server

2014-07-03 15:49:54,928 INFO  [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs227,60020,1402643884463 belongs to anexisting region server

2014-07-03 15:49:54,928 INFO  [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs197,60020,1402643884579 belongs to anexisting region server

2014-07-03 15:49:54,928 INFO  [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs198,60020,1402643884392 belongs to anexisting region server

2014-07-03 15:49:54,928 INFO  [master:rs227:60000] master.MasterFileSystem:Log folder hdfs://mycluster/hbase/WALs/rs226,60020,1402643884476 belongs to anexisting region server

2014-07-03 15:49:55,011 INFO  [master:rs227:60000] master.RegionStates:Transitioned {1588230740 state=OFFLINE, ts=1402645794937, server=null} to{1588230740 state=OPEN, ts=1402645795011, server=rs229,60020,1402643884248}

2014-07-03 15:49:55,011 INFO  [master:rs227:60000] master.RegionStates:Onlined 1588230740 on rs229,60020,1402643884248

2014-07-03 15:49:55,011 INFO  [master:rs227:60000] master.ServerManager:AssignmentManager hasn't finished failover cleanup; waiting

2014-07-03 15:49:55,012 INFO  [master:rs227:60000] master.HMaster:hbase:meta assigned=0, rit=false, location=rs229,60020,1402643884248

2014-07-03 15:49:55,094 INFO  [master:rs227:60000]catalog.MetaMigrationConvertingToPB: META already up-to date with PBserialization

2014-07-03 15:49:55,118 INFO  [master:rs227:60000] master.RegionStates:Transitioned {cb2b12bd4e03b016c9e0a9c3b9c52098 state=OFFLINE, ts=1402645795118,server=null} to {cb2b12bd4e03b016c9e0a9c3b9c52098 state=OPEN, ts=1402645795118,server=rs197,60020,1402643884579}

2014-07-03 15:49:55,118 INFO  [master:rs227:60000] master.RegionStates:Onlined cb2b12bd4e03b016c9e0a9c3b9c52098 on rs197,60020,1402643884579

2014-07-03 15:49:55,119 DEBUG [master:rs227:60000]master.AssignmentManager: Found {ENCODED => cb2b12bd4e03b016c9e0a9c3b9c52098,NAME => 'hbase:namespace,,1402643891966.cb2b12bd4e03b016c9e0a9c3b9c52098.',STARTKEY => '', ENDKEY => ''} out on cluster

2014-07-03 15:49:55,119 INFO  [master:rs227:60000] master.AssignmentManager:Found regions out on cluster or in RIT; presuming failover

2014-07-03 15:49:55,220 DEBUG [master:rs227:60000]hbase.ZKNamespaceManager: Updating namespace cache from node default with data:\x0A\x07default

2014-07-03 15:49:55,223 DEBUG [master:rs227:60000]hbase.ZKNamespaceManager: Updating namespace cache from node hbase with data:\x0A\x05hbase

2014-07-03 15:49:55,242 INFO  [master:rs227:60000]zookeeper.RecoverableZooKeeper: Node /hbase/namespace/default already existsand this is not a retry

2014-07-03 15:49:55,249 INFO  [master:rs227:60000] zookeeper.RecoverableZooKeeper:Node /hbase/namespace/hbase already exists and this is not a retry

2014-07-03 15:49:55,251 INFO  [master:rs227:60000] master.HMaster:Master has completed initialization

只看红色标注的地方,意思就是说当我们kill掉rs229上的hmaster的时候,Nomaster available. Notifying waiting threads . A master is now available(找不到master,唤醒等待的hmaster线程(认识96),然后找到了等待的hmaster(rs227)),然后zookeeper就接管并且将rs227上的hmaster从等待状态切换为激活状态了,然后就ok了。(当然也可以多开几个备用的hmaster)

# 安装Mysql 5.5.x

# yum安装mysql(其它方式也行了,这样方便点)

# yum 安装 mysql-server

[root@rs229 ~]# yum installMySQL-server-5.5.31-2.el6.i686.rpm

# yum 安装 mysql-client

[root@rs229 ~]# yum installMySQL-client-5.5.31-2.el6.i686.rpm

# 启动mysql服务

[root@rs229 lib]# service mysql start (注意这里是mysql,不是mysqld哦)

Starting MySQL.. SUCCESS!

[root@rs229 lib]#

# 配置mysql(设置账号密码)

[root@rs229 yum.repos.d]# mysql_secure_installation

NOTE: RUNNING ALL PARTS OF THIS SCRIPT IS RECOMMENDEDFOR ALL MySQL

SERVERSIN PRODUCTION USE!  PLEASE READ EACH STEPCAREFULLY!

In order to log into MySQL to secure it, we'll needthe current

password for the root user.  If you've just installed MySQL, and

you haven't set the root password yet, the passwordwill be blank,

so you should just press enter here.

Enter current password for root (enter for none):

OK, successfully used password, moving on...

Setting the root password ensures that nobody can loginto the MySQL

root user without the proper authorisation.

Set root password? [Y/n] y

New password:

Re-enter new password:

Password updated successfully!

Reloading privilege tables..

... Success!

By default, a MySQL installation has an anonymoususer, allowing anyone

to log into MySQL without having to have a useraccount created for

them.  This isintended only for testing, and to make the installation

go a bit smoother. You should remove them before moving into a

production environment.

Remove anonymous users? [Y/n] n

... skipping.

Normally, root should only be allowed to connect from'localhost'.  This

ensures that someone cannot guess at the rootpassword from the network.

Disallow root login remotely? [Y/n] n

... skipping.

By default, MySQL comes with a database named 'test'that anyone can

access.  Thisis also intended only for testing, and should be removed

before moving into a production environment.

Remove test database and access to it? [Y/n] n

... skipping.

Reloading the privilege tables will ensure that allchanges made so far

will take effect immediately.

Reload privilege tables now? [Y/n] y

... Success!

Cleaning up...

All done!  Ifyou've completed all of the above steps, your MySQL

installation should now be secure.

Thanks for using MySQL!

# 授权可以远程访问mysql

mysql> grant all on *.* to 'root'@'%'  identified by '1234567';(1234567是root远程登录的密码,可以修改成你自己好记的)

Query OK, 0 rows affected (0.00 sec)

mysql> flush privileges;

Query OK, 0 rows affected (0.00 sec)

然后可以远程登录了,不解释

# Hive-0.13.1

# 解压apache-hive-0.13.1-bin.tar.gz

[root@rs229 hive]# pwd

/usr/local/adsit/yting/apache/hive

[root@rs229 hive]# ll

total 52976

-rw-r--r-- 1 root root 54246778 Jun 11 15:23apache-hive-0.13.1-bin.tar.gz

[root@rs229 hive]# tar -zxvfapache-hive-0.13.1-bin.tar.gz

# 通过../conf目录下的模版文件复制出对应的文件

[root@rs229 hive]# ll

total 52980

drwxr-xr-x 8 root root     4096 Jul 04 16:52 apache-hive-0.13.1-bin

-rw-r--r-- 1 root root 54246778 Jun 11 15:23apache-hive-0.13.1-bin.tar.gz

[root@rs229 hive]# cd apache-hive-0.13.1-bin

[root@rs229 apache-hive-0.13.1-bin]# ll

total 304

drwxr-xr-x 3 root root      4096 Jul 04 16:52 bin

drwxr-xr-x 2 root root      4096 Jul 04 16:52 conf

drwxr-xr-x 4 root root      4096 Jul 04 16:52 examples

drwxr-xr-x 7 root root      4096 Jul 04 16:52 hcatalog

drwxr-xr-x 4 root root      4096 Jul 04 16:52 lib

-rw-rw-r-- 1 500 cimsrvr  23828 Jan 30 06:23LICENSE

-rw-rw-r-- 1 500 cimsrvr    277 May 13 08:00NOTICE

-rw-rw-r-- 1 500 cimsrvr   3838 May 23 14:19README.txt

-rw-rw-r-- 1 500 cimsrvr 253839 Jun  3 03:27RELEASE_NOTES.txt

drwxr-xr-x 3 root root      4096 Jul 04 16:52 scripts

[root@rs229 apache-hive-0.13.1-bin]# cd conf/

[root@rs229 conf]# ll

total 120

-rw-rw-r-- 1 500 cimsrvr 107221 Jun  3 03:27 hive-default.xml.template

-rw-rw-r-- 1 500 cimsrvr   2378 Jan 30 06:23 hive-env.sh.template

-rw-rw-r-- 1 500 cimsrvr   2662 May 13 08:00hive-exec-log4j.properties.template

-rw-rw-r-- 1 500 cimsrvr   3050 May 13 08:00hive-log4j.properties.template

[root@rs229 conf]# cp hive-env.sh.templatehive-env.sh

[root@rs229 conf]# cp hive-default.xml.templatehive-site.xml

[root@rs229 conf]# ll

total 232

-rw-rw-r-- 1 500 cimsrvr 107221 Jun  3 03:27hive-default.xml.template

-rw-r--r-- 1 root root      2378 Jul 04 16:54 hive-env.sh

-rw-rw-r-- 1 500 cimsrvr   2378 Jan 30 06:23hive-env.sh.template

-rw-rw-r-- 1 500 cimsrvr   2662 May 13 08:00hive-exec-log4j.properties.template

-rw-rw-r-- 1 500 cimsrvr   3050 May 13 08:00hive-log4j.properties.template

-rw-r--r-- 1 root root    107221 Jul 04 16:54 hive-site.xml

# 修改hive-env.sh配置文件

[root@rs229 conf]# vi hive-env.sh

# Set HADOOP_HOME to point to a specific hadoopinstall directory

HADOOP_HOME=/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0

# 修改hive-site.xml文件

<configuration>

<property>

<name>hive.metastore.warehouse.dir</name>

<value>hdfs://mycluster/user/hive/warehouse</value>

</property>

<property>

<name>hive.exec.scratchdir</name>

<value>hdfs://mycluster/user/hive/scratchdir</value>

</property>

<property>

<name>hive.querylog.location</name>

<value>/usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/logs</value>

</property>

<property>

<name>javax.jdo.option.ConnectionURL</name>

<value>jdbc:mysql://rs229:3306/hiveMeta?createDatabaseIfNotExist=true</value>

</property>

<property>

<name>javax.jdo.option.ConnectionDriverName</name>

<value>com.mysql.jdbc.Driver</value>

</property>

<property>

<name>javax.jdo.option.ConnectionUserName</name>

<value>root</value>

</property>

<property>

<name>javax.jdo.option.ConnectionPassword</name>

<value>yousmile</value>

</property>

<property>

<name>hive.aux.jars.path</name>                <value>file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hive-hbase-handler-0.13.1.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/protobuf-java-2.5.0.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hbase-client-0.96.2-hadoop2.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/hbase-common-0.96.2-hadoop2.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/zookeeper-3.4.5.jar,file:///usr/local/adsit/yting/apache/hive/apache-hive-0.13.1-bin/lib/guava-11.0.2.jar</value>

<!?这里是重点的地方,为了跟Hbase整合,所以千万别写错了,hive.aux.jars.path 的value中间不允许有空格,回车,换行什么的,全部写在一行上就行了,不然会出各种错-->

</property>

<property>

<name>hive.zookeeper.quorum</name>

<value>rs229,rs227,rs226,rs198,rs197</value>

<description>The list of zookeeper servers to talk to. This isonly needed for read/write locks.</description>

</property>

<!--

<property>

<name>hive.metastore.uris</name>

<value>thrift://rs229:9083</value>

</property>

-->

</configuration>

注意1 : <name>hive.aux.jars.path</name>配置中不要出现空格的符号,全部写在一行上,不然要报错

注意2 : 如果使用mysql的话需要在${HIVE_HOME}/lib 目录下加入mysql的jdbc链接jar包

注意3 : mysql必须授权远程登录

# 创建数据仓库目录

[root@rs229 bin]# ./hadoop fs -mkdir -p/user/hive/warehouse

# 将需要的jar包加入到$HIVE_HOME/lib下去

hive.aux.jars.path这个路径用到的jar复制到$HIVE_HOME/lib目录下去,对照配置然后去将Hadoop目录下相关的jar包拷到Hive目录下去

# Hive测试

# 进入Hive Shell

[root@rs229 conf]# hive

hive> show databases;

OK

default

Time taken: 0.629 seconds, Fetched: 2 row(s)

hive> create database yting_test_20140703;

# 创建数据库

hive> show databases;

OK

default

yting_test_20140703

# 建表

hive> use yting_test_20140703;

OK

Time taken: 0.023 seconds

hive> create external table yousmile(uidint,myname string,youname string) row format delimited fields terminated by ','location '/user/hive/warehouse/yousmile';

这里是创建的一个外部表

# Load数据到表yting_test_20140703

[root@rs229 tdata]# pwd

/usr/local/yting/hive/data/tdata

[root@rs229 tdata]# cat/usr/local/yting/hive/data/tdata/testdata001.dat

1314520,yting,xmei

[root@rs229 tdata]#

hive> load data local inpath'/usr/local/yting/hive/data/tdata/testdata001.dat' overwrite into tableyousmile;

Copying data fromfile:/usr/local/yting/hive/data/tdata/testdata001.dat

Copying file:file:/usr/local/yting/hive/data/tdata/testdata001.dat

Loading data to table yting_test_20140703.yousmile

rmr: DEPRECATED: Please use 'rm -r' instead.

Deleted hdfs://mycluster/user/hive/warehouse/yousmile

Table yting_test_20140703.yousmile stats:[numFiles=0, numRows=0, totalSize=0, rawDataSize=0]

OK

Time taken: 0.877 seconds

hive> show tables;

OK

yousmile

Time taken: 0.028 seconds, Fetched: 1 row(s)

# 查询数据是否加载成功

hive> select * from yousmile;

OK

1314520  yting       xmei

Time taken: 0.326 seconds, Fetched: 1 row(s)

hive>

可以查询出刚刚的数据,不解释,成功了!

# Hive跟Hbase的整合

# Hive To Hbase(Hive中的表数据导入到Hbase中去)

# 创建Hbase能识别的表(带分区,扩展知识用的,这里不使用)

hive> CREATE TABLEhbase_to_hbase_yousmile_20140704(key int, value string) partitioned by (daystring) STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITHSERDEPROPERTIES ("hbase.columns.mapping" = ":key,cf1:info")TBLPROPERTIES ("hbase.table.name" ="hive_to_hbase_yousmile_20140704");

# 创建Hbase能识别的表(不带分区)

hive> set hive.hbase.bulk=true;

# 创建数据库yting_yousmile_20140704

hive> create databaseyting_yousmile_20140704; # 创建数据库

hive> useyting_yousmile_20140704;

OK

Time taken: 0.023 seconds

OK

Time taken: 0.254 seconds

# 使用数据库yting_yousmile_20140704

hive> useyting_yousmile_20140704; # 使用该数据库

OK

Time taken: 0.023 seconds

# 创建本地表,用来存储数据,然后插入到Hbase用的,相当于一张中间表了

hive> create tablehive_to_hbase_middle_yousmile_20140704(uid int,info string) row formatdelimited fields terminated by ',';

OK

Time taken: 0.056 seconds

hive> show tables;

OK

hive_to_hbase_middle_yousmile_20140704

hive_to_hbase_yousmile_20140704

Time taken: 0.023 seconds, Fetched: 2 row(s)

hive>

# Load本地数据(不是HDFS上的数据,Linux上的数据)到Hive的中间表hive_to_hbase_middle_yousmile_20140704去

hive> load data localinpath '/usr/local/yting/hive/data/tdata/hbase_test001.dat' overwrite intotable hive_to_hbase_middle_yousmile_20140704;

Copying data fromfile:/usr/local/yting/hive/data/tdata/hbase_test001.dat

Copying file:file:/usr/local/yting/hive/data/tdata/hbase_test001.dat

Loading data to table yting_yousmile_20140704.hive_to_hbase_middle_yousmile_20140704

rmr: DEPRECATED: Please use 'rm -r' instead.

Deletedhdfs://mycluster/user/hive/warehouse/yting_yousmile_20140704.db/hive_to_hbase_middle_yousmile_20140704

Table yting_yousmile_20140704.hive_to_hbase_middle_yousmile_20140704stats: [numFiles=1, numRows=0, totalSize=38, rawDataSize=0]

OK

Time taken: 0.647 seconds

# 查看中间表hive_to_hbase_middle_yousmile_20140704是否Load数据成功

hive> select * fromhive_to_hbase_middle_yousmile_20140704;

OK

1     the

2     you

3     smile

4     until

5     forever

Time taken: 0.272 seconds, Fetched: 5 row(s)

hive>

# 中间表的数据hive_to_hbase_middle_yousmile_20140704插入与Hbase关联的表hive_to_hbase_yousmile_20140704,也就是插入到Hbase中去

这里稍微花了点时间,耐心的等一下

hive> insert overwritetable hive_to_hbase_yousmile_20140704 select * fromhive_to_hbase_middle_yousmile_20140704;

Total jobs = 1

Launching Job 1 out of 1

Number of reduce tasks is set to 0 since there's noreduce operator

Starting Job = job_1404453897041_0002, Tracking URL =http://rs229:8088/proxy/application_1404453897041_0002/

Kill Command =/usr/local/adsit/yting/apache/hadoop/hadoop-2.2.0/bin/hadoop job  -kill job_1404453897041_0002

Hadoop job information for Stage-0: number ofmappers: 1; number of reducers: 0

2014-07-04 17:49:37,780 Stage-0 map = 0%,  reduce = 0%

2014-07-04 17:50:05,963 Stage-0 map = 100%,  reduce = 0%, Cumulative CPU 3.59 sec

MapReduce Total cumulative CPU time: 3 seconds 590msec

Ended Job = job_1404453897041_0002

MapReduce Jobs Launched:

Job 0: Map: 1  Cumulative CPU: 3.59 sec   HDFSRead: 312 HDFS Write: 0 SUCCESS

Total MapReduce CPU Time Spent: 3 seconds 590 msec

OK

Time taken: 105.236 seconds

hive>

# 查看与Hbase关联的表hive_to_hbase_yousmile_20140704有数据了没有

hive> select * fromhive_to_hbase_yousmile_20140704;

OK

1     the

2     you

3     smile

4     until

5     forever

Time taken: 0.109 seconds, Fetched:5 row(s)

hive>

可以看到已经插入数据了

# 在Hbase Shell下查看刚刚在Hive下的数据是否已经被插入了

[root@rs229 ~]# hbase shell

hbase(main):002:0> list

TABLE

hive_hbase_combiner

hive_to_hbase_yousmile_20140704

yting_xmei1129

3 row(s) in 0.0260 seconds

=> ["hive_hbase_combiner","hive_to_hbase_yousmile_20140704", "yting_xmei1129"]

hbase(main):003:0> scan'hive_to_hbase_yousmile_20140704'

ROW                                                 COLUMN+CELL

1                                                 column=cf1:info, timestamp=1404467409828, value=the

2                                                 column=cf1:info, timestamp=1404467409828, value=you

3                                                 column=cf1:info, timestamp=1404467409828, value=smile

4                                                 column=cf1:info, timestamp=1404467409828, value=until

5                                                  column=cf1:info,timestamp=1404467409828, value=forever

5 row(s) in 0.0740 seconds

hbase(main):004:0>

可以看到数据成功插入到Hbase,到这里,Hive To Hbase已经完成了

# 注意

注意:与hbase整合的有分区的表存在个问题  select * from table查询不到数据,selectkey,value from table可以查到数据

# Hbase To Hive(Hbase中的表数据导入到Hive)

# Hbase Shell下创建一张表

RowKey是gid string

列族是:info:time    info:address

hbase(main):005:0> create'hbase_to_hive_yousmile_20140704','gid','info'

0 row(s) in 0.4420 seconds

=> Hbase::Table - hbase_to_hive_yousmile_20140704

hbase(main):006:0> put'hbase_to_hive_yousmile_20140704','3344520','info:time','20140704'

0 row(s) in 0.0760 seconds

hbase(main):008:0> put'hbase_to_hive_yousmile_20140704','3344520','info:address','beijing'

0 row(s) in 0.0760 seconds

hbase(main):009:0> scan'hbase_to_hive_yousmile_20140704'

ROW                                             COLUMN+CELL

3344520                                        column=info:address, timestamp=1404634322078, value=beijing

3344520                                        column=info:time, timestamp=1404469280507, value=20140704

1 row(s) in 0.0200 seconds

hbase(main):010:0>

# Hive下创建表连接Hbase中的表

hive> create external tablehbase_to_hive_yousmile_20140704 (key string,gid map<string,string>)STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler' WITHSERDEPROPERTIES ("hbase.columns.mapping" ="info:")  TBLPROPERTIES ("hbase.table.name" = "hbase_to_hive_yousmile_20140704");

OK

Time taken: 0.222 seconds

hive> show tables;

OK

hbase_to_hive_yousmile_20140704

hive_to_hbase_middle_yousmile_20140704

hive_to_hbase_yousmile_20140704

Time taken: 0.027 seconds, Fetched: 3 row(s)

hive>

# 查询Hbase表中的数据

hive> select * fromhbase_to_hive_yousmile_20140704;

OK

3344520  {"address":"beijing","time":"20140704"}

Time taken: 0.156 seconds, Fetched: 1row(s)

hive>

可以看出在Hive下能查询出Hbase表中的数据了

# Hadoop,Hive,Hbase整合完毕

终于整合完毕了,这个整合弄到了一半的时候没时间弄了,中间耽搁了好久,公司有点小忙,所以细心的话可以看出中间的时间间隔是隔了一定的时间了,不过今天总算是整理完了,这正是一个历史性的时刻

# Hive连接Hbase的优化

注:hive连接hbase优化,将$HADOOP_HOME/conf中的hbase-site.xml文件中增加配置

<property>
   <name>hbase.client.scanner.caching</name>
   <value>10000</value>
 </property>

或者在执行hive语句之前执行hive>set hbase.client.scanner.caching=10000;

# 结束感言

终于搞完了,虽然期间各种错,中间也停留了一段时间,不过也总算整合完毕了,虽然有点小小的幸福,但是这都是值得的,该跑步去了,身体重要啊!且码且珍惜 、、、
上一篇:初步谈谈 C# 多线程、异步编程与并发服务器


下一篇:用adb取出在手机中安装的apk