一、修改三台主机名(三台机器都需执行)
hostnamectl set-hostname hadoop102
hostnamectl set-hostname hadoop103
hostnamectl set-hostname hadoop104
二、修改hosts(三台机器都需执行)
vim /etc/hosts
10.206.16.3 hadoop102
10.206.16.5 hadoop103
10.206.16.14 hadoop104
三、安装必要软件
# epel-release是额外软件仓库包
yum install -y epel-release
# vim是文本编辑工具,最小化安装需要
yum install -y vim
# net-tools是网络工具,包含ifconfig等命令,最小化安装需要;
yum install -y net-tools
# rsync是文件分发工具
yum install -y rsync
四、关闭防火墙(腾讯云不需要,在控制台放行)
# 查看防火墙状态
systemctl status firewalld
# 关闭防火墙
systemctl stop firewalld
# 关闭防火墙开机自启动
systemctl disable firewalld
五、卸载自带JDK(最小化安装不需要,腾讯云不需要)
rpm -qa | grep -i java | xargs -n1 rpm -e --nodeps
六、安装JDK(hadoop102执行)
mkdir -p /opt/software
mkdir -p /opt/module
cd /opt/software
# 下载openjdk
wget https://download.java.net/openjdk/jdk8u41/ri/openjdk-8u41-b04-linux-x64-14_jan_2020.tar.gz
# 解压
tar -zxvf openjdk-8u41-b04-linux-x64-14_jan_2020.tar.gz -C /opt/module/
# 修改环境变量
vim /etc/profile
# 文件最后面追加
export JAVA_HOME=/opt/module/java-se-8u41-ri
export PATH=$PATH:$JAVA_HOME/bin
# 刷新
source /etc/profile
七、安装Hadoop3.3.1(hadoop102执行)
cd /opt/software
# 下载Hadoop3.3.1
wget --no-check-certificate https://downloads.apache.org/hadoop/common/hadoop-3.3.1/hadoop-3.3.1.tar.gz
# 解压
tar -zxvf /opt/software/hadoop-3.3.1.tar.gz -C /opt/module/
# 编辑环境变量
vim /etc/profile
# 文件最后面追加
export HADOOP_HOME=/opt/module/hadoop-3.3.1
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
# 刷新
source /etc/profile
八、配置ssh免密登录
ssh-keygen -t rsa
# 三次回车 在用户家目录下会生成id_rsa私钥、id_rsa.pub公钥
# 免密登录hadoop102,输入yes,输入对应的密码
ssh-copy-id hadoop102
# 免密登录hadoop103,输入yes,输入对应的密码
ssh-copy-id hadoop103
# 免密登录hadoop104,输入yes,输入对应的密码
ssh-copy-id hadoop104
103,104按上述也需配置免密登录
九、xsync分发脚本
vim xsync
#!/bin/bash
#1. 判断参数个数
if [ $# -lt 1 ]
then
echo Not Enough Arguement!
exit;
fi
#2. 遍历集群所有机器
for host in hadoop102 hadoop103 hadoop104
do
echo ==================== $host ====================
#3. 遍历所有目录,挨个发送
for file in $@
do
#4. 判断文件是否存在
if [ -e $file ]
then
#5. 获取父目录
pdir=$(cd -P $(dirname $file); pwd)
#6. 获取当前文件的名称
fname=$(basename $file)
ssh $host "mkdir -p $pdir"
rsync -av $pdir/$fname $host:$pdir
else
echo $file does not exists!
fi
done
done
十、集群配置规则
NameNode与SecondaryNameNode不要安装在同一服务器
ResourceManager也很消耗资源,不要与NameNode或者SecondaryNameNode安装在同一服务器
hadoop102 | hadoop103 | hadoop104 | |
---|---|---|---|
HDFS | NameNode | SecondaryNameNode | |
DataNode | DataNode | DataNode | |
YARN | ResourceManager | ||
NodeManager | NodeManager | NodeManager |
十一、core配置文件
cd /opt/module/hadoop-3.3.1/etc/hadoop
vim core-site.xml
<configuration>
<!--指定namenode的地址-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop102:8020</value>
</property>
<!--指定Hadoop数据存储的位置-->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.3.1/data</value>
</property>
</configuration>
十二、hdfs配置文件
vim hdfs-site.xml
<configuration>
<!--namenode web访问地址-->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop102:9870</value>
</property>
<!--secondarynamenode web访问地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop104:9868</value>
</property>
</configuration>
十三、yarn配置文件
vim yarn-site.xml
<configuration>
<!--指定mapreduce走shuffle-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--指定ResourceManager地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop103</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/opt/module/hadoop-3.3.1/etc/hadoop:/opt/module/hadoop-3.3.1/share/hadoop/common/lib/*:/opt/module/hadoop-3.3.1/share/hadoop/common/*:/opt/module/hadoop-3.3.1/share/hadoop/hdfs:/opt/module/hadoop-3.3.1/share/hadoop/hdfs/lib/*:/opt/module/hadoop-3.3.1/share/hadoop/hdfs/*:/opt/module/hadoop-3.3.1/share/hadoop/mapreduce/*:/opt/module/hadoop-3.3.1/share/hadoop/yarn:/opt/module/hadoop-3.3.1/share/hadoop/yarn/lib/*:/opt/module/hadoop-3.3.1/share/hadoop/yarn/*</value>
</property>
<!--开启日志聚集功能-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--设置日志聚集服务器地址-->
<property>
<name>yarn.log.server.url</name>
<value>http://hadoop102:19888/jobhistory/logs</value>
</property>
<!--设置日志保留时间7天-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
十四、mapreduce配置文件
vim mapred-site.xml
<configuration>
<!--指定mapreduce程序运行在yarn上-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--历史服务器地址-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop102:10020</value>
</property>
<!--历史服务器web端地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop102:19888</value>
</property>
</configuration>
十五、配置workers
vim workers
# 删除原来的localhost,改为:
hadoop102
hadoop103
hadoop104
十六、start-dfs.sh,stop-dfs.sh(在hadoop安装目录的sbin里)两个文件顶部添加以下参数
vim /opt/module/hadoop-3.3.1/sbin/start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim /opt/module/hadoop-3.3.1/sbin/stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
十七、start-yarn.sh,stop-yarn.sh(在hadoop安装目录的sbin里)两个文件顶部添加以下参数
vim /opt/module/hadoop-3.3.1/sbin/start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vim /opt/module/hadoop-3.3.1/sbin/stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
十八、修改hadoop-env.sh JAVA_HOME路径
vim /opt/module/hadoop-3.3.1/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/opt/module/java-se-8u41-ri
十九、将配置分发给103、104
./xsync /etc/profile
./xsync /opt/module/hadoop-3.3.1/
./xsync /opt/module/java-se-8u41-ri/
# 在103、104执行
source /etc/profile
二十、集群启动脚本
vim startHadoop.sh
#!/bin/bash
echo " =================== 启动 hadoop集群 ==================="
echo " --------------- 启动 hdfs ---------------"
ssh hadoop102 "/opt/module/hadoop-3.3.1/sbin/start-dfs.sh"
echo " --------------- 启动 yarn ---------------"
ssh hadoop103 "/opt/module/hadoop-3.3.1/sbin/start-yarn.sh"
echo " --------------- 启动 historyserver ---------------"
ssh hadoop102 "/opt/module/hadoop-3.3.1/bin/mapred --daemon start historyserver"
chmod +x startHadoop.sh
二十一、集群关闭脚本
vim stopHadoop.sh
#!/bin/bash
echo " =================== 关闭 hadoop集群 ==================="
echo " --------------- 关闭 historyserver ---------------"
ssh hadoop102 "/opt/module/hadoop-3.3.1/bin/mapred --daemon stop historyserver"
echo " --------------- 关闭 yarn ---------------"
ssh hadoop103 "/opt/module/hadoop-3.3.1/sbin/stop-yarn.sh"
echo " --------------- 关闭 hdfs ---------------"
ssh hadoop102 "/opt/module/hadoop-3.3.1/sbin/stop-dfs.sh"
chmod +x stopHadoop.sh
二十二、集群启动测试
# 第一次需要初始化
hdfs namenode -forma
# 启动脚本
./startHadoop.sh
# 启动成功可以访问hadoop102:9870
二十三、单词计数
hdfs dfs -mkdir /software
hdfs dfs -mkdir /wcinput
vim user.txt
zou zou zou
hhh hhh
abcd
yuan yuan yuan
123 123 123 123
# 文件上传
hdfs dfs -put user.txt /wcinput
# 任务执行
hadoop jar /opt/module/hadoop-3.3.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.1.jar wordcount /wcinput /wcoutput