1、集群规划
2、服务器规划
3、安装文件准备
4、ha系统环境准备
4.1、操作系统安装,此环境采用centos minimal安装,系统版本CentOS Linux release 7.8.2003 (Core)。
mkdir /soft
上传安装文件到soft目录(hadoop-2.7.7-centos7.tar.gz 、jdk-8u151-linux-x64.tar.gz、 zookeeper-3.4.14.tar.gz)
4.2、关闭防火墙
systemctl stop firewalld.service && systemctl disable firewalld.service
4.3、修改selinux
sed -i 's/enforcing/disabled/g' /etc/selinux/config
4.4、Java安装
tar -zxvf jdk-8u201-linux-x64.tar.gz -C /usr/local/
修改环境变量
echo "export ZOOKEEPER_HOME=/opt/hadoop/zookeeper">> /etc/profile
echo "export HADOOP_HOME=/opt/hadoop/hadoop" >> /etc/profile
echo "export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH" >> /etc/profile
echo "export JAVA_HOME=/usr/local/jdk1.8.0_151" >> /etc/profile
echo "export JRE_HOME=/usr/local/jdk1.8.0_151/jre" >> /etc/profile
echo "export CLASS_PATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar:$JRE_HOME/lib" >> /etc/profile
echo "export PATH=/opt/hadoop/zookeeper/bin:$JAVA_HOME/bin:$JRE_HOME/bin:$PATH" >> /etc/profile
source /etc/profile
4.5、添加hadoop用户
groupadd hadoop && useradd -g hadoop -d /home/hadoop hadoop echo 'hadoop' | passwd hadoop --stdin
4.5、创建hadoop安装目录,并修改权限
mkdir -p /opt/hadoop && chown -R hadoop:hadoop /opt/hadoop
mkdir -p /opt/data/hadoop/hdfs && chown -R hadoop:hadoop /opt/data/hadoop/hdfs
mkdir -p /opt/data/hadoop/tmp && chown -R hadoop:hadoop /opt/data/hadoop/tmp
4.6、修改hosts配置文件
echo "192.168.32.11 node1" >>/etc/hosts
echo "192.168.32.12 node2" >>/etc/hosts
echo "192.168.32.13 node3" >>/etc/hosts
echo "192.168.32.14 node4" >>/etc/hosts
4.7、配置免密码登录(相关节点均执行)
ssh-keygen -t rsa
[root@node1 ~]# ssh-copy-id node1
[root@node1 ~]# ssh-copy-id node2
[root@node1 ~]# ssh-copy-id node3
[root@node1 ~]# ssh-copy-id node4
5、hadoop高可用安装
5.1、zookeeper安装
[root@node1 ~]# cd /soft/
tar -zxcf zookeeper-3.4.14.tar.gz -C /opt/hadoop/
mv zookeeper-3.4.14/ zookeeper
配置zoo.cfg配置文件,并创建相关目录
[root@node1 ~]# cd /opt/hadoop/zookeeper/conf
[root@node1 conf]# cp zoo_sample.cfg zoo.cfg
[root@node1 conf]# vim zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/hadoop/zookeeper/data
dataLogDir=/opt/hadoop/zookeeper/dataLog
clientPort=2181
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
maxClientCnxns=60
[root@node1 ~]# mkdir -p /opt/hadoop/zookeeper/data
[root@node1 ~]# mkdir -p /opt/hadoop/zookeeper/dataLog
修改myid文件
[root@node1 conf]# cd /opt/hadoop/zookeeper/data
[root@node1 data]# touch myid && echo 1 > myid
将zookeeper整个文件夹传送到其他节点的相同目录
[root@node1 hadoop]# scp -r zookeeper/ node2:$PWD
[root@node1 hadoop]# scp -r zookeeper/ node3:$PWD
[root@node1 hadoop]# scp -r zookeeper/ node4:$PWD
修改其他节点myid文件内容,序号与zoo.cfg配置文件内server.1=node1保持一致
[root@node3 data]# sed -i "s/1/3/g" /opt/hadoop/zookeeper/data/myid
[root@node2 data]# sed -i "s/1/2/g" /opt/hadoop/zookeeper/data/myid
[root@node4 data]# sed -i "s/1/4/g" /opt/hadoop/zookeeper/data/myid
三个节点均启动zk并查看zk状态
[root@node3 hadoop]# zkServer.sh start
ZooKeeper JMX enabled by default
Using config: /opt/hadoop/zookeeper/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@node3 hadoop]# jps
1949 QuorumPeerMain
1983 Jps
[root@node3 hadoop]# zkServer.sh status
ZooKeeper JMX enabled by default
Using config: /opt/hadoop/zookeeper/bin/../conf/zoo.cfg
Mode: leader
[root@node3 hadoop]#
5.2、hdfs组件安装
[root@node1 conf]# cd /soft
[root@node1 soft]# ls
hadoop-2.7.7-centos7.tar.gz jdk-8u151-linux-x64.tar.gz zookeeper-3.4.14.tar.gz
[root@node1 soft]# tar -zxvf hadoop-2.7.7-centos7.tar.gz -C /opt/hadoop/
[root@node1 hadoop]# mv hadoop-2.7.7/ hadoop
修改配置文件添加JAVA_HOME环境变量
修改hadoop-env.sh
export JAVA_HOME=/usr/local/jdk1.8.0_151
修改hdfs-site.xml文件
<configuration>
<property>
<!-- 为namenode集群定义一个services name -->
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<!-- nameservice 包含哪些namenode,为各个namenode起名 -->
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<!-- 名为nn1的namenode 的rpc地址和端口号,rpc用来和datanode通讯 -->
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>node1:8020</value>
</property>
<property>
<!-- 名为nn2的namenode 的rpc地址和端口号,rpc用来和datanode通讯 -->
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>node2:8020</value>
</property>
<property>
<!--名为nn1的namenode 的http地址和端口号,web客户端 -->
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>node1:50070</value>
</property>
<property>
<!--名为nn2的namenode 的http地址和端口号,web客户端 -->
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>node2:50070</value>
</property>
<property>
<!-- namenode间用于共享编辑日志的journal节点列表 -->
<!-- 指定NameNode的edits元数据的共享存储位置 -->
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node1:8485;node2:8485;node3:8485/mycluster</value>
</property>
<property>
<!-- journalnode 上用于存放edits日志的目录 -->
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop/hadoop/tmp/data/dfs/jn</value>
</property>
<property>
<!-- 客户端连接可用状态的NameNode所用的代理类 -->
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行 -->
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- journalnode集群之间通信的超时时间 -->
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>60000</value>
</property>
<!-- 指定副本数 -->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!--namenode路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/data/hadoop/hdfs/nn</value>
</property>
<!--datanode路径-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/data/hadoop/hdfs/dn</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 启用webhdfs -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>ha.failover-controller.cli-check.rpc-timeout.ms</name>
<value>60000</value>
</property>
</configuration>
修改core-site.xml文件
<configuration>
<property>
<!-- hdfs 地址,ha中是连接到nameservice -->
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<!-- -->
<name>hadoop.tmp.dir</name>
<value>/opt/data/hadoop/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<!-- hadoop链接zookeeper的超时时长设置 -->
<property>
<name>ha.zookeeper.session-timeout.ms</name>
<value>30000</value>
<description>ms</description>
</property>
<property>
<name>fs.trash.interval</name>
<value>1440</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
</configuration>
修改yarn-site.xml文件
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
<property>
<!-- 启用resourcemanager的ha功能 -->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<!-- 为resourcemanage ha 集群起个id -->
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster</value>
</property>
<property>
<!-- 指定resourcemanger ha 有哪些节点名 -->
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<!-- 指定第一个节点的所在机器 -->
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node2</value>
</property>
<property>
<!-- 指定第二个节点所在机器 -->
<name>yarn.resourcemanager.hostname.rm2</name>
<value>node3</value>
</property>
<property>
<!-- 指定resourcemanger ha 所用的zookeeper 节点 -->
<name>yarn.resourcemanager.zk-address</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<property>
<!-- -->
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!-- 制定resourcemanager的状态信息存储在zookeeper集群上 -->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://node2:19888/jobhistory/logs/</value>
</property>
</configuration>
修改mapred-site.xml文件
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 设置mapreduce的历史服务器地址和端口号 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
</property>
<!-- mapreduce历史服务器的web访问地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>
</configuration>
6、hadoop启动
6.1、启动journalnode
[root@node1 hadoop]# hadoop-daemon.sh start journalnode
[root@node2 hadoop]# hadoop-daemon.sh start journalnode
[root@node3 hadoop]# hadoop-daemon.sh start journalnode
6.2、第一个namenode启动
[root@node1 hadoop]# hdfs namenode -format
[root@node1 hadoop]#hadoop-daemon.sh start namenode
6.3、第二个namenode启动
[root@node2 hadoop]# hdfs namenode -bootstrapStandby
[root@node2 hadoop]# hadoop-daemon.sh start namenode
6.4、激活node1为active状态
[root@node1 hadoop]# hdfs haadmin -transitionToActive --forcemanual nn1
6.5、检查namenode运行状态
[root@node1 hadoop]# hdfs haadmin -getServiceState nn1
active
[root@node1 hadoop]# hdfs haadmin -getServiceState nn2
standby
6.6、注册HA到zookeeper
[root@node1 hadoop]# hdfs zkfc -formatZK
6.7、启动datanode
[root@node1 hadoop]# start-dfs.sh
6.8、启动yarn
[root@node3 hadoop]# start-yarn.sh
[root@node2 hadoop]# start-yarn.sh
[root@node1 hadoop]# start-yarn.sh
6.9、检查yarn的启动情况
[root@node1 hadoop]# yarn rmadmin -getServiceState rm1
active
[root@node1 hadoop]# yarn rmadmin -getServiceState rm2
standby