背景
Linux环境中有个很重要的服务需要持续运行,发现服务停止后能够自动拉起
下面是我在项目中用到的监控click house服务的监控脚本,每隔10秒去检查一次
shell脚本后台启动命令 nohup sh checkprocess.sh &
#! /bin/bash
#restart scrape news process if the process exited accidentally
log_file="restart_sh.log"
# return the current date time
TIMESTAMP(){
echo $(date "+%Y-%m-%d %H:%M:%S")
}
restart_process_if_die(){
# echo "paras is: $@"
be_running=$(ps -ef | grep $restart_process | grep -v grep | wc -l)
if [ $be_running -eq 0 ];
then
echo "$(TIMESTAMP) $restart_process got down, now I will restart it" | tee -a $log_file
echo "Now I am in $PWD"
echo $start_file
eval $start_file
echo "$(TIMESTAMP) $restart_process restart successfully" | tee -a $log_file
else
echo "$(TIMESTAMP) $restart_process is running, no need to restart"
fi
}
restart_process="clickhouse-server"
start_file="/usr/hdp/2.2.0.0-2041/clickhouse/bin/clickhouse server -C /usr/hdp/2.2.0.0-2041/clickhouse/conf/clickhouse-server/config.xml -P /usr/hdp/2.2.0.0-2041/clickhouse/clickhouse-server.pid --daemon"
while :
do
restart_process_if_die
echo "$(TIMESTAMP) now I will sleep 10S"
sleep 10
done