Hadoop完全分布式运行环境的搭建
所需资源链接:https://pan.baidu.com/s/1g6A-m86ApIEMDhTFSqmlCw
提取码:e8fq
复制这段内容后打开百度网盘手机App,操作更方便哦–来自百度网盘超级会员V2的分享
环境准备
虚拟机环境准备
1. 克隆三台虚拟机
在vm左上方“虚拟机”选项选择“管理”列表的“克隆”选项
2. 修改克隆虚拟机的静态IP
点击有限设置
点击设置按钮
3. 修改主机名
[root@Hadoop101 lpl]$ vim /etc/hostname
将主机名分别修改为Hadoop101,Hadoop102,Hadoop103
4. 关闭防火墙
[root@hadoop101 ~]# systemctl stop firewalld.service
[root@hadoop101 ~]# systemctl disable firewalld.service
[root@hadoop102 ~]# systemctl stop firewalld.service
[root@hadoop102 ~]# systemctl disable firewalld.service
[root@hadoop103 ~]# systemctl stop firewalld.service
[root@hadoop103 ~]# systemctl disable firewalld.service
三台机器进行同样操作
配置免密登录
[root@hadoop101 ~]# vim /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.5.101 hadoop101
192.168.5.102 hadoop102
192.168.5.103 hadoop103
[root@hadoop101 ~]# ssh-keygen -t rsa
[root@hadoop101 ~]# ssh-copy-id hadoop101
[root@hadoop101 ~]# ssh-copy-id hadoop102
[root@hadoop101 ~]# ssh-copy-id hadoop103
其余两台机器同样操作一遍
其余同理。
安装JDK
1.卸载linux上原有open jdk,其余两台机器同样操作进行卸载
[root@hadoop101 ~]# rpm -qa | grep jdk
java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64
copy-jdk-configs-3.3-2.el7.noarch
java-1.8.0-openjdk-headless-1.8.0.161-2.b14.el7.x86_64
[root@hadoop101 ~]# rpm -e --nodeps java-1.8.0-openjdk-1.8.0.161-2.b14.el7.x86_64
[root@hadoop101 ~]# rpm -e --nodeps copy-jdk-configs-3.3-2.el7.noarch
[root@hadoop101 ~]# rpm -e --nodeps java-1.8.0-openjdk-headless-1.8.0.161-2.b14.el7.x86_64
2.创建软件包存放目录
[root@hadoop101 ~]# mkdir /opt/software
[root@hadoop101 ~]# cd /opt/software/
3.上传jdk安装包并进行解压,添加环境变量
[root@hadoop101 software]# mkdir /opt/module
[root@hadoop101 software]# tar -zxvf jdk-8u211-linux-x64.tar.gz -C /opt/module/
/opt/module/jdk1.8.0_211
[root@hadoop101 jdk1.8.0_211]# vim /etc/profile
在profile结尾处加上jdk路径
#JAVA_HOME
export JAVA_HOME=/opt/module/jdk1.8.0_211
export PATH=$PATH:$JAVA_HOME/bin
4.source下
[root@hadoop101 jdk1.8.0_211]# source /etc/profile
[root@hadoop101 jdk1.8.0_211]# java -version
5.将module包的jdk路径传输到其余两台机器上,并配置jdk环境变量source下
[root@hadoop101 module]# scp -r /opt/module/jdk1.8.0_211/ hadoop102:/opt/module/
[root@hadoop101 module]# scp -r /opt/module/jdk1.8.0_211/ hadoop103:/opt/module/
[root@hadoop101 module]# scp /etc/profile hadoop102:/etc/
[root@hadoop101 module]# scp /etc/profile hadoop103:/etc/
[root@hadoop102 module]# source /etc/profile
[root@hadoop102 module]# java -version
[root@hadoop103 ~]# source /etc/profile
[root@hadoop103 ~]# java -version
安装Zookeeper 3.5.7
1.上传压缩包到software文件夹,并进行解压
[root@hadoop101 module]# cd /opt/software/
[root@hadoop101 software]# tar -zxvf apache-zookeeper-3.5.7-bin.tar.gz -C /opt/module/
2.分发到各个节点上
[root@hadoop101 software]# cd /opt/module/
[root@hadoop101 module]# scp -r apache-zookeeper-3.5.7-bin/ hadoop102:/opt/module/
[root@hadoop101 module]# scp -r apache-zookeeper-3.5.7-bin/ hadoop103:/opt/module/
3.在zookeeper目录创建zkData目录
[root@hadoop101 module]# cd apache-zookeeper-3.5.7-bin/
[root@hadoop101 apache-zookeeper-3.5.7-bin]# mkdir zkData
4.在zkData目录下创建myid文件,写上对应比编号1并保存
[root@hadoop101 apache-zookeeper-3.5.7-bin]# cd zkData/
[root@hadoop101 zkData]# vim myid
1
5.分发zkData目录
[root@hadoop101 zkData]# cd ..
[root@hadoop101 apache-zookeeper-3.5.7-bin]# scp -r zkData/ hadoop102:/opt/module/apache-zookeeper-3.5.7-bin/
[root@hadoop101 apache-zookeeper-3.5.7-bin]# scp -r zkData/ hadoop103:/opt/module/apache-zookeeper-3.5.7-bin/
6.配置zoo.cfg
[root@hadoop101 apache-zookeeper-3.5.7]# cd conf/
[root@hadoop101 conf]# mv zoo_sample.cfg zoo.cfg
[root@hadoop101 conf]# vim zoo.cfg
修改数据存储路径
dataDir=/opt/module/apache-zookeeper-3.5.7-bin/zkData
在文件末尾处增加集群配置
server.1=hadoop101:2888:3888
server.2=hadoop102:2888:3888
server.3=hadoop103:2888:3888
分发zoo.cfg
[root@hadoop101 conf]# scp zoo.cfg hadoop102:/opt/module/apache-zookeeper-3.5.7-bin/conf/
[root@hadoop101 conf]# scp zoo.cfg hadoop103:/opt/module/apache-zookeeper-3.5.7-bin/conf/
7.修改其余两台机器的myid,分别为2,3
[root@hadoop102 apache-zookeeper-3.5.7]# vim zkData/myid
2
[root@hadoop103 apache-zookeeper-3.5.7]# vim zkData/myid
3
8.启动集群
[root@hadoop101 ~]# /opt/module/apache-zookeeper-3.5.7-bin/bin/zkServer.sh start
[root@hadoop102 ~]# /opt/module/apache-zookeeper-3.5.7-bin/bin/zkServer.sh start
[root@hadoop103 ~]# /opt/module/apache-zookeeper-3.5.7-bin/bin/zkServer.sh start
安装Hadoop 3.1.3
HDFS HA搭建
1.上传压缩包到software文件夹,并进行解压
[root@hadoop101 module]# cd /opt/software/
[root@hadoop101 software]# tar -zxvf hadoop-3.1.3.tar.gz -C /opt/module/
2.分发opt目录下hadoop文件夹
[root@hadoop101 software]# cd /opt/module/
[root@hadoop101 module]# scp -r hadoop-3.1.3/ hadoop102:/opt/module/
[root@hadoop101 module]# scp -r hadoop-3.1.3/ hadoop103:/opt/module/
3.配置hadoop环境变量,结尾处加上hadoop路径,其余两台机器同样操作
[root@hadoop101 hadoop-3.1.3]# vim /etc/profile
#HADOOP_HOME
export HADOOP_HOME=/opt/module/hadoop-3.1.3
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
[root@hadoop101 hadoop-3.1.3]# source /etc/profile
[root@hadoop101 hadoop-3.1.3]# hadoop version
Hadoop 3.1.3
Source code repository https://gitbox.apache.org/repos/asf/hadoop.git -r ba631c436b806728f8ec2f54ab1e289526c90579
Compiled by ztang on 2019-09-12T02:47Z
Compiled with protoc 2.5.0
From source with checksum ec785077c385118ac91aadde5ec9799
This command was run using /opt/module/hadoop-3.1.3/share/hadoop/common/hadoop-common-3.1.3.jar
4.配置nameservice,编写hdfs-sitx.xml
[root@hadoop101 hadoop-3.1.3]# cd etc/hadoop/
[root@hadoop101 hadoop]# vim hdfs-site.xml
<configuration>
<!--配置副本数-->
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<!--配置nameservice-->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!--配置多NamenNode-->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2,nn3</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop101:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop102:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn3</name>
<value>hadoop103:8020</value>
</property>
<!--为NamneNode设置HTTP服务监听-->
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop101:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop102:9870</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn3</name>
<value>hadoop103:9870</value>
</property>
<!--配置jn节点,该节点用于各NameNode节点通信-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop101:8485;hadoop102:8485;hadoop103:8485/mycluster</value>
</property>
<!--配置HDFS客户端联系Active NameNode节点的Java类-->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制,即同一时刻只能有一台服务器对外响应 -->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!-- 使用隔离机制时需要ssh无秘钥登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 关闭权限检查-->
<property>
<name>dfs.permissions.enable</name>
<value>false</value>
</property>
</configuration>
5.编写core-site.xml
<configuration>
<!--指定defaultFS-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!--指定jn存储路径-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/module/hadoop-3.1.3/JN/data</value>
</property>
<!--配置hadoop运行时临时文件-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.1.3/tmp</value>
</property>
</configuration>
6.在hdfs.xml添加故障自动转移
[root@hadoop101 hadoop]# vim hdfs-site.xml
<!--配置故障自动转义-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
7.在core-site.xml添加zookeeper地址
<!--配置zookeeper地址-->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop101:2181,hadoop102:2181,hadoop103:2181</value>
</property>
ResouceManager HA搭建
1.编写yarn-site.xml
[root@hadoop101 hadoop]# vim yarn-site.xml
<!--yarn 高可用配置-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster1</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop101</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop103</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop101:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop103:8088</value>
</property>
<property>
<name>hadoop.zk.address</name>
<value>hadoop101:2181,hadoop102:2181,hadoop103:2181</value>
</property>
<!--启用自动恢复-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的状态信息存储在zookeeper集群-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
启动集群
1.配置workers(老版本为slaves)
[root@hadoop101 hadoop]# vim workers
hadoop101
hadoop102
Hadoop103
2.分发配置文件
[root@hadoop101 hadoop]# cd ..
[root@hadoop101 etc]# scp -r hadoop/ hadoop102:/opt/module/hadoop-3.1.3/etc/
[root@hadoop101 etc]# scp -r hadoop/ hadoop103:/opt/module/hadoop-3.1.3/etc/
3.在各台机器上启动journalnode服务
[root@hadoop101 hadoop-3.1.3]# sbin/hadoop-daemon.sh start journalnode
[root@hadoop102 hadoop-3.1.3]# sbin/hadoop-daemon.sh start journalnode
[root@hadoop103 hadoop-3.1.3]# sbin/hadoop-daemon.sh start journalnode
4.在nn1上对namenode进行格式化
[root@hadoop101 hadoop-3.1.3]# bin/hdfs namenode -format
5.在start-dfs.sh,stop-dfs.sh中配置root用户,顶部配置以下内容
[root@hadoop101 hadoop-3.1.3]# vim sbin/start-dfs.sh
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
[root@hadoop101 hadoop-3.1.3]# vim sbin/stop-dfs.sh
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
6.在start-yarn.sh,stop-yarn.sh中配置root用户,顶部配置以下内容
[root@hadoop101 hadoop-3.1.3]# vim sbin/start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
[root@hadoop101 hadoop-3.1.3]# vim sbin/stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
7.编辑hadoop-env.sh,解开注释,添加JAVA_HOME
[root@hadoop101 hadoop-3.1.3]# vim etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_211
[root@hadoop102 hadoop-3.1.3]# vim etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_211
[root@hadoop103 hadoop-3.1.3]# vim etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_211
8.分发以上.sh文件
[root@hadoop101 hadoop-3.1.3]# scp -r sbin/ hadoop102:/opt/module/hadoop-3.1.3/
[root@hadoop101 hadoop-3.1.3]# scp -r sbin/ hadoop103:/opt/module/hadoop-3.1.3/
9.同步,启动nn1的namenode,在 nn2和nn3上进行同步
[root@hadoop101 hadoop-3.1.3]# sbin/hadoop-daemon.sh start namenode
[root@hadoop102 hadoop-3.1.3]# bin/hdfs namenode -bootstrapStandby
[root@hadoop103 hadoop-3.1.3]# bin/hdfs namenode -bootstrapStandby
[root@hadoop102 hadoop-3.1.3]# sbin/hadoop-daemon.sh start namenode
[root@hadoop103 hadoop-3.1.3]# sbin/hadoop-daemon.sh start namenode
10.关闭所有hdfs服务
[root@hadoop101 hadoop-3.1.3]# sbin/stop-all.sh
11.初始化HA在Zookeeper中状态
[root@hadoop101 hadoop-3.1.3]# bin/hdfs zkfc -formatZK
12.启动集群服务
[root@hadoop101 hadoop-3.1.3]# sbin/start-all.sh