[初始化系统]
sudo systemctl stop firewalld
sudo systemctl disable firewalld
sudo setenforce 0
sudo vi /etc/sysconfig/selinux
SELINUX=disabled
sudo tar -zxf docker-18.06.3-ce.tgz -C /usr/local/
cp /usr/local/docker/* /usr/bin/
mkdir -p /home/docker
[修正docker自启动问题]
添加docker自启动需要文件
sudo vi /usr/lib/systemd/system/docker.service
[Unit]
Description=Docker Application Container Engine
Documentation=https://docs.docker.com
After=network-online.target firewalld.service
Wants=network-online.target
[Service]
Type=notify
ExecStart=/usr/bin/dockerd --graph /home/docker
ExecReload=/bin/kill -s HUP $MAINPID
LimitNOFILE=infinity
LimitNPROC=infinity
TimeoutStartSec=0
Delegate=yes
KillMode=process
Restart=on-failure
StartLimitBurst=3
StartLimitInterval=60s
[Install]
WantedBy=multi-user.target
sudo groupadd docker
将您的用户添加到该docker组
sudo usermod -aG docker $USER
将登陆用户加入到docker用户组中
sudo gpasswd -a $USER docker
更新用户组
newgrp docker
配置docker以systemctl方式启动
sudo systemctl start docker
配置docker开机启动
sudo systemctl enable docker
数据库是否换成maridb
[数据库需确认]
copy sql脚本到数据库容器
[完善docker cp sql脚本到容器]
使用docker cp方式拷贝dataexa-insight-microservice-poc.sql到容器根目录下
docker cp /home/dataexa/insight-deploy/resources/mirrors/mysql/dataexa-insight-microservice-poc.sql mysql:/
docker exec -it mysql bash
mysql -uroot -pDataExa5528280
create database dataexa-insight-microservice-poc
default character set utf8 default collate utf8_general_ci;
show databases;
use dataexa-insight-microservice-poc;
source生效 执行完整路径下的sql脚本
source /dataexa-insight-microservice-poc.sql
show tables;
使用docker cp方式拷贝xxl-job-poc.sql到容器根目录下
docker cp /home/dataexa/insight-deploy/resources/xxl/xxl-job-poc.sql mysql:/
source /xxl-job-poc.sql
平滑方式退出docker容器
键盘: curl+ p +q 三个键位一起按出现 read escape sequence 即可平滑退出容器
[unzip /home/dataexa/insight-deploy/resources/html/platform.zip -d] /home/dataexa/insight-microservice/workspace/nginx_workspace/html/
需要先安装unzip
unzip [缺unzip离线包]
sudo rpm -ivh /home/dataexa/insight-deploy/resources/tools_package/offline_installer/centos/unzip-6.0-20.el7.x86_64.rpm
查看unzip是否安装成功
which unzip
/usr/bin/unzip
[粘贴复制的时候-v 没有空格]
[修改完nginx.conf]
docker restart nginx
重启前需要curl+p+q 平滑退出容器
重启elasticsearch容器
docker restart elasticsearch
离线安装bzip2
sudo rpm -ivh insight-deploy/resources/tools_package/offline_installer/centos/bzip2-1.0.6-13.el7.x86_64.rpm
检查 bzip2 是否安装
which bzip2
/usr/bin/bzip2
conda使用的python3.5.tar.gz包变成了zip包
[修正命令]
unzip /home/dataexa/insight-deploy/resources/conda/python/python3.5.zip -d /home/dataexa/anaconda3/envs
增加创建路径[python监控路径]
mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/container_monitor
mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5
vim 补充 [待修改] vim 不能用
sudo cp /home/dataexa/insight-deploy/resources/tools_package/offline_installer/ubuntu/vim.zip
sudo cd /usr/local
sudo unzip vim.zip
sudo ln -s /usr/local/vim/bin/vim /usr/local/bin/vim
查看vim是否安装
sudo which vim
python [修正]
dlv5 修改[增加nohup python]
nohup python /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5/insight-v5service/tornado_service.py >> /home/dataexa/insight-microservice/workspace/python_service_workspace/dlv5/insight-v5service/nohup.out 2>&1 &
insight-tagging [创建目录,cp之前没目录]
mkdir -p /home/dataexa/insight-microservice/workspace/python_service_workspace/data_labeling
cd /home/dataexa/insight-microservice/workspace/python_service_workspace/data_labeling/insight-tagging/
启动之前,先把缓存清除
find . -name "pycache" |xargs rm -r
启动命令
nohup python tornado_service.py >/dev/null 2>&1 &
解压前创建spark⼯作⽬录
mkdir -p /home/dataexa/insight-microservice/workspace/spark_workspace
解压前创建hadoop⼯作⽬录
mkdir -p /home/dataexa/insight-microservice/workspace/hadoop_workspace/
解压前创建 yarn-conf⼯作⽬录
mkdir -p /home/dataexa/insight-microservice/workspace/hadoopworkspace/yarn-conf/
Hadoop部署[修正环境变量] 修改jdk和hadoop环境变量都设置到dataexa用户下
vi ~/.bashrc
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
如果出现格式化文件系统 hdfs namenode -format 出错
cd /tmp
将标识有hadoop文件名称的hadoop相关文件 备份移动到另一个目录下
重新格式化文件系统
出现20/05/21 07:23:26 INFO ipc.Client: Retrying connect to server: 0.0.0.0/0.0.0.0:8032. Already tried 0 time(s); retry policy is RetryUpT
sbin/start-yarn.sh #启动yarn即可
出现org.apache.hadoop.mapred.FileAlreadyExistsException: Output directory hdfs://localhost:9000/user/dataexa/output already exists
hdfs dfs -rm -r output
再重新执行即可
完整hadoop部署
[修改了jdk和hadoop的环境变量到dataexa用户下]
解压
tar xf /home/dataexa/insight-deploy/resources/jdk/jdk-8u231-linux-x64.tar.gz -C /home/dataexa/insight-deploy/resources/jdk
添加变量环境
vi ~/.bashrc
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
export HADOOP_HOME=/home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
生效dataexa下的用户环境
source ~/.bashrc
进入hadoop环境
cd /home/dataexa/insight-deploy/resources/hadoop/hadoop-2.9.0
vi etc/hadoop/hadoop-env.sh
export JAVA_HOME=/home/dataexa/insight-deploy/resources/jdk/jdk1.8.0_231
export PATH=$JAVA_HOME/bin:$PATH
在Hadoop的安装目录下,创建input目录
mkdir input
拷贝input文件到input目录下
cp etc/hadoop/*.xml input
执行Hadoop job
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep input output 'dfs[a-z.]+'
上面的job是使用hadoop自带的样例,在input中统计含有dfs的字符串。
确认执行结果
cat output/*
添加主机地址和映射
sudo vi /etc/hosts
192.168.1.237 localhost
修改设定文件
vi etc/hadoop/core-site.xml
vi etc/hadoop/hdfs-site.xml
设置本机⽆密码ssh登陆
ssh-keygen -t rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
测试
ssh dataexa@192.168.1.237
格式化文件系统
hdfs namenode -format
启动名称节点和数据节点后台进程
sbin/start-dfs.sh
确认
jps
访问NameNode的web页面
创建HDFS
hdfs dfs -mkdir /user/test
拷贝input文件到HDFS目录下
hdfs dfs -put etc/hadoop /user/test/input
确认,查看
hadoop fs -ls /user/test/input
执行Hadoop job
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep /user/test/input output 'dfs[a-z.]+'
确认执行结果
hdfs dfs -cat output/*
或者从HDFS拷贝到本地查看
bin/hdfs dfs -get output output
cat output/*
停止daemon
sbin/stop-dfs.sh
执行YARN job
修改设定文件
cp etc/hadoop/mapred-site.xml.template etc/hadoop/mapred-site.xml
vi etc/hadoop/mapred-site.xml
vi etc/hadoop/yarn-site.xml
启动ResourceManger和NodeManager后台进程
sbin/start-yarn.sh
访问ResourceManger的web页面
#执行hadoop job
hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.9.0.jar grep /user/test/input output 'dfs[a-z.]+'
如果这里报错,需要sbin/stop-yarn.sh再sbin/start-yarn.sh
确认执行结果
hdfs dfs -cat output/*
停止daemon
sbin/stop-yarn.sh
启动jobhistory daemon
sbin/mr-jobhistory-daemon.sh start historyserver
访问Job History Server的web页面 出现数据即可
http://localhost:19888/