check OpenStack server
#!/bin/bash
source /root/admin-openrc.sh
VIP=$(cat /etc/hosts | awk '/# For openstack management/,/# For ceph management/ { print }' | egrep -v '# For ceph management|# For openstack management' | awk '{print $1}')
CONTROLER_IP_LIST=$(nova service-list | awk -F '|' '/nova-scheduler/ {print $4}'| sort -u)
IP_LIST=$(cat /etc/hosts | awk '/# For management/, /# For openstack management/ { print }' | egrep -v '# For management|# For openstack management' | awk '{print $1}')
SSH_USERNAME=jitstack
SSH_KEY=/home/jitstack/.ssh/id_rsa
SSH_PARAMETER="ssh -i ${SSH_KEY} -o StrictHostKeyChecking=no ${SSH_USERNAME}"
SHUTDOWN_VIRTBR()
{
for i in $IP_LIST
do
virbr=$(${SSH_PARAMETER}@$i sudo virsh net-list |awk '/default/ {print $1}')
if [[ ${virbr} == default ]]
then
${SSH_PARAMETER}@$i sudo virsh net-destroy default > /dev/null 2>&1
${SSH_PARAMETER}@$i sudo virsh net-undefine default > /dev/null 2>&1
fi
done
}
CHECK_HARDWARE()
{
for i in $IP_LIST
do
HARDWARE_TEST=$(${SSH_PARAMETER}@$i sudo dmesg -T|egrep -i 'error|failed' 2>/dev/null)
if [[ -n ${HARDWARE_TEST} ]]
then
HARDWARE_WARNING_LIST[a]=${i}
let a++
fi
done
if [[ -n ${HARDWARE_WARNING_LIST[*]} ]]
then
echo -e "HARDWARE \033[33mWARNING\033[0m ${HARDWARE_WARNING_LIST[*]}"
else
echo -e "HARDWARE \033[32mOK \033[0m"
fi
}
CHECK_NTP()
{
for i in $IP_LIST
do
NTP_TEST=$(${SSH_PARAMETER}@$i sudo chronyc -n sources 2>/dev/null | awk 'NR>3 { print }')
if [[ -z ${NTP_TEST} ]]
then
NTP_ERROR_LIST[a]=${i}
let a++
fi
done
if [[ -n ${NTP_ERROR_LIST[*]} ]]
then
echo -e "NTP \033[31mERROR \033[0m ${NTP_ERROR_LIST[*]}"
else
echo -e "NTP \033[32mOK \033[0m"
fi
}
CHECK_CPU()
{
for i in $IP_LIST
do
CPU_TEST=$(${SSH_PARAMETER}@$i sudo vmstat 2>/dev/null| awk 'NR>2 {print $15}')
if [[ ${CPU_TEST} -le 30 ]]
then
CPU_WARNING_LIST[a]=${i}
let a++
fi
done
if [[ -n ${CPU_WARNING_LIST[*]} ]]
then
echo -e "CPU \033[33mWARNING\033[0m ${CPU_WARNING_LIST[*]}"
else
echo -e "CPU \033[32mOK \033[0m"
fi
}
CHECK_MEM()
{
for i in $IP_LIST
do
MEM_TEST=$(${SSH_PARAMETER}@$i sudo free -g 2>/dev/null| awk 'NR==2 {print $3}')
if [[ ${MEM_TEST} -le 20 ]]
then
MEM_WARNING_LIST[a]=${i}
let a++
fi
done
if [[ -n ${MEM_WARNING_LIST[*]} ]]
then
echo -e "MEM \033[33mWARNING\033[0m ${MEM_WARNING_LIST[*]}"
else
echo -e "MEM \033[32mOK \033[0m"
fi
}
CHECK_DISK()
{
for i in $IP_LIST
do
DISK_TEST=$(${SSH_PARAMETER}@$i sudo df -Th 2>/dev/null| awk 'NR>1 {print $6}'| sed 's/%//g')
for j in ${DISK_TEST}
do
if [[ ${j} -ge 80 ]]
then
DISK_WARNING_TEST_LIST[a]=${i}
let a++
fi
done
if [[ -n ${DISK_WARNING_TEST_LIST[@]} ]]
then
DISK_WARNING_LIST[b]=${i}
let b++
fi
done
if [[ -n ${DISK_WARNING_LIST[@]} ]]
then
echo -e "DISK \033[33mWARNING\033[0m ${DISK_WARNING_LIST[*]}"
else
echo -e "DISK \033[32mOK \033[0m"
fi
}
CHECK_NETWORK()
{
ALL_VIPS=$(cat /etc/keepalived/keepalived.conf | awk '/virtual_ipaddress/, /track_script/ {print $1}'| egrep -v 'virtual_ipaddress|}|track_script|^$')
NET_ADDR=$(ip route | awk 'NR>1 {print $1}'| awk -F '0/' '{print $1}')
HOST_ADDR=$(cat /etc/hosts | awk '/# For management/, /# For openstack management/ { print }' | egrep -v '# For management|# For openstack management' | awk '{print $1}'| awk -F '.' '{print $NF}')
for i in ${NET_ADDR}
do
for j in ${HOST_ADDR}
do
ALL_IP_LIST[a]=${i}${j}
let a++
done
done
sudo fping ${ALL_IP_LIST[@]} ${ALL_VIPS[@]} 2>/dev/null > /tmp/fping.log
NETWORK_ERROR_LIST=$(cat /tmp/fping.log | grep unreachable | awk '{print $1}')
if [[ -n ${NETWORK_ERROR_LIST} ]]
then
echo -e "NETWORK \033[31mERROR \033[0m" ${NETWORK_ERROR_LIST[@]}
else
echo -e "NETWORK \033[32mOK \033[0m"
fi
}
CHECK_MEMCACHE()
{
for i in ${CONTROLER_IP_LIST}
do
MEMCACHE_TEST=$(${SSH_PARAMETER}@$i sudo netstat -tnlp 2>/dev/null |grep memcached)
if [[ -z ${MEMCACHE_TEST} ]]
then
MEMCACHE_ERROR_LIST[a]=${i}
let a++
fi
done
if [[ -n ${MEMCACHE_ERROR_LIST[@]} ]]
then
echo -e "MEMCACHE \033[31mERROR \033[0m ${MEMCACHE_ERROR_LIST[@]}"
else
echo -e "MEMCACHE \033[32mOK \033[0m"
fi
}
CHECK_MYSQL_SLAVE()
{
for i in ${CONTROLER_IP_LIST}
do
Slave_IO_Running=$(${SSH_PARAMETER}@$i 'sudo mysql -e "show slave status\G;"' | awk -F ': ' '/Slave_IO_Running/ {print $2}')
Slave_SQL_Running=$(${SSH_PARAMETER}@$i 'sudo mysql -e "show slave status\G;"' | awk -F ': ' '/Slave_SQL_Running/ {print $2}')
if [[ -z ${Slave_IO_Running} && -z ${Slave_SQL_Running} ]]
then
:
elif [[ ${Slave_IO_Running} != "Yes" || ${Slave_SQL_Running} != "Yes" ]]
then
MYSQL_ERROR_LIST[a]=$i
let a++
fi
done
if [[ -n ${MYSQL_ERROR_LIST[@]} ]]
then
echo -e "MYSQL \033[31mERROR \033[0m ${MYSQL_ERROR_LIST[@]}"
else
echo -e "MYSQL \033[32mOK \033[0m"
fi
}
CHECK_RABBITMQ()
{
for i in ${CONTROLER_IP_LIST}
do
status=$(${SSH_PARAMETER}@$i sudo rabbitmqctl cluster_status 2>/dev/null | grep running_nodes)
if [[ -z ${status} ]]
then
${SSH_PARAMETER}@$i sudo systemctl restart rabbitmq-server 2>/dev/null
status_agagin=$(${SSH_PARAMETER}@$i sudo rabbitmqctl cluster_status 2>/dev/null | grep running_nodes)
if [[ -z ${status_agagin} ]]
then
RABBITMQ_ERROR_LIST[a]=${i}
let a++
fi
fi
done
if [[ -n ${RABBITMQ_ERROR_LIST[@]} ]]
then
echo -e "RABBITMQ \033[31mERROR \033[0m ${RABBITMQ_ERROR_LIST[@]}"
else
echo -e "RABBITMQ \033[32mOK \033[0m"
fi
}
CHECK_REDIS()
{
for i in ${CONTROLER_IP_LIST}
do
status=$(${SSH_PARAMETER}@$i sudo redis-cli info 2>/dev/null|grep Replication)
if [[ -z ${status} ]]
then
${SSH_PARAMETER}@$i sudo systemctl restart redis 2>/dev/null
status_agagin=$(${SSH_PARAMETER}@$i sudo redis-cli info 2>/dev/null|grep Replication)
if [[ -z ${status_agagin} ]]
then
REDIS_ERROR_LIST[a]=${i}
let a++
fi
fi
done
if [[ -n ${REDIS_ERROR_LIST[@]} ]]
then
echo -e "REDIS \033[31mERROR \033[0m ${REDIS_ERROR_LIST[@]}"
else
echo -e "REDIS \033[32mOK \033[0m"
fi
}
CHECK_KEYSTONE()
{
KEYSTONE_ERROR_LIST=$(openstack user list|grep admin)
if [[ -z ${KEYSTONE_ERROR_LIST} ]]
then
echo -e "KEYSTONE \033[31mERROR \033[0m ${VIP}"
else
echo -e "KEYSTONE \033[32mOK \033[0m"
fi
}
CHECK_GLANCE()
{
GLANCE_ERROR_LIST=$(glance image-list|grep Name)
if [[ -z ${GLANCE_ERROR_LIST} ]]
then
echo -e "GLANCE \033[31mERROR \033[0m ${VIP}"
else
echo -e "GLANCE \033[32mOK \033[0m"
fi
}
CHECK_NOVA()
{
NOVA_ERROR_LIST=$(nova service-list| grep -v Binary| awk -F '|' '/down/ {print $4}'| sort -u)
if [[ -n ${NOVA_ERROR_LIST} ]]
then
echo -e "NOVA \033[31mERROR \033[0m ${NOVA_ERROR_LIST[*]}"
else
echo -e "NOVA \033[32mOK \033[0m"
fi
}
CHECK_NEUTRON()
{
NEUTRON_ERROR_LIST=$(neutron agent-list | awk -F '|' '/XXX/ {print $4}'| sort -u)
if [[ -n ${NEUTRON_ERROR_LIST} ]]
then
echo -e "NEUTRON \033[31mERROR \033[0m ${NEUTRON_ERROR_LIST[*]}"
else
echo -e "NEUTRON \033[32mOK \033[0m"
fi
}
CHECK_CINDER()
{
CINDER_ERROR_LIST=$(cinder service-list| awk -F '|' '/down/ {print $3}'| awk -F '@' '{print $1}'| sort -u)
if [[ -n ${CINDER_ERROR_LIST} ]]
then
echo -e "CINDER \033[31mERROR \033[0m ${CINDER_ERROR_LIST[*]}"
else
echo -e "CINDER \033[32mOK \033[0m"
fi
}
CHECK_NGINX()
{
for i in ${CONTROLER_IP_LIST}
do
status=$(${SSH_PARAMETER}@$i sudo curl -s --max-time 10 localhost:80 2>/dev/null |grep nginx)
if [[ -z ${status} ]]
then
${SSH_PARAMETER}@$i sudo systemctl restart nginx 2>/dev/null
status_agagin=$(${SSH_PARAMETER}@$i sudo curl -s --max-time 10 localhost:80 2>/dev/null |grep nginx)
if [[ -z ${status_agagin} ]]
then
NGINX_ERROR_LIST[a]=${i}
let a++
fi
fi
done
if [[ -n ${NGINX_ERROR_LIST[@]} ]]
then
echo -e "NGINX \033[31mERROR \033[0m ${NGINX_ERROR_LIST[@]}"
else
echo -e "NGINX \033[32mOK \033[0m"
fi
}
CHECK_TOMCAT()
{
for i in ${CONTROLER_IP_LIST}
do
#status=$(${SSH_PARAMETER}@$i sudo curl -s --max-time 10 localhost:8080/jitstack/#/login 2>/dev/null|grep webConfig.js)
status=$(${SSH_PARAMETER}@$i sudo curl -I -m 10 -s --max-time 10 -o /dev/null -s -w %{http_code} localhost:8080/jitstack/)
if [[ ${status} != 200 ]]
then
${SSH_PARAMETER}@$i sudo systemctl restart tomcat 2>/dev/null
status_agagin=$(${SSH_PARAMETER}@$i sudo curl -I -m 10 -s --max-time 10 -o /dev/null -s -w %{http_code} localhost:8080/jitstack/)
if [[ ${status_agagin} != 200 ]]
then
TOMCAT_ERROR_LIST[a]=${i}
let a++
fi
fi
done
if [[ -n ${TOMCAT_ERROR_LIST[@]} ]]
then
echo -e "TOMCAT \033[31mERROR \033[0m ${TOMCAT_ERROR_LIST[@]}"
else
echo -e "TOMCAT \033[32mOK \033[0m"
fi
}
CHECK_APACHE2()
{
for i in ${CONTROLER_IP_LIST}
do
status=$(${SSH_PARAMETER}@$i sudo curl -s --max-time 10 localhost:5000 2>/dev/null|grep openstack)
if [[ -z ${status} ]]
then
${SSH_PARAMETER}@$i sudo systemctl restart apache2 2>/dev/null
status_agagin=$(${SSH_PARAMETER}@$i sudo curl -s --max-time 10 localhost:5000 2>/dev/null|grep openstack)
if [[ -z ${status_agagin} ]]
then
APACHE2_ERROR_LIST[a]=${i}
let a++
fi
fi
done
if [[ -n ${APACHE2_ERROR_LIST[@]} ]]
then
echo -e "APACHE2 \033[31mERROR \033[0m ${APACHE2_ERROR_LIST[@]}"
else
echo -e "APACHE2 \033[32mOK \033[0m"
fi
}
CHECK_CEPH()
{
size=$(ceph osd pool ls detail |awk '/vms/ {print $6}')
ceph osd stat|grep -w osds|perl -pe 's/.*osdmap.*?://'| awk '// {{total_osd=$1}; {up_osd=$3}; {in_osd=$5}} END {printf "OSDs ";printf("Total: %s UP: %s IN: %s Down: %s\n"),total_osd,up_osd,in_osd,total_osd-up_osd}'
#rados df | perl -pe 's/total /total_/'| awk -v rs=$size '/total_/ {if($1=="total_space"){total=$2}; if($1=="total_used"){used=$2}; if($1=="total_avail"){avail=$2}} END {printf "CEPH ";printf("Total: %sGB Used: %sGB Available: %sGB\n"),total/rs,used/rs,avail/rs}'
ceph osd df | awk -v rs=$size '/TOTAL/ {print "CEPH Total: "$2/rs"GB","Used: "$3/rs"GB","Available: "$4/rs"GB"}'
}
CHECK_VMS()
{
nova list|awk '{line[NR]=$0} END {for(i=4;i<NR;i++) print line[i]}'| awk -F '|' '{print $4}'| sed 's/ //g' | awk '{a[$1]++;b++} END {printf "VMS Total: "b" ";for(i in a) printf i ": " a[i]" " ;}'
echo ""
ALL_PH_CPUS=$(nova hypervisor-stats |grep -w vcpus| awk -F '|' '{print $3}' | sed 's/ //g')
nova flavor-list | awk '{line[NR]=$0} END {for(i=4;i<NR;i++) print line[i]}' | awk -F '|' '{print $8}'| sed 's/ //g' |awk -v all_ph_cpus=$ALL_PH_CPUS '{vcpus+=$1} END {printf("CPUS CPUS: %s VCPUS: %s RATIO: %s\n"),all_ph_cpus,vcpus,vcpus/all_ph_cpus}'
ALL_PH_RAMS=$(nova hypervisor-stats |grep -w memory_mb| awk -F '|' '{print $3}' | sed 's/ //g')
nova flavor-list | awk '{line[NR]=$0} END {for(i=4;i<NR;i++) print line[i]}' | awk -F '|' '{print $4}'| sed 's/ //g' |awk -v all_ph_rams=$ALL_PH_RAMS '{vrams+=$1} END {printf("RAMS RAMS: %sGB VRAMS: %sGB RATIO: %s\n"),all_ph_rams/1024,vrams/1024,vrams/all_ph_rams}'
size=$(ceph osd pool ls detail |awk '/vms/ {print $6}')
#TOTAL_SPACE=$(rados df|awk '/total_space/ {print $2}'| sed 's/G//g')
TOTAL_SPACE=$(ceph osd df| awk '/TOTAL/ {print $2}' | sed 's/G//g')
#cinder list | awk '{line[NR]=$0} END {for(i=4;i<NR;i++) print line[i]}' | awk -F '|' '{print $5}'| sed 's/ //g' |awk -v sz=$size -v total_space=$TOTAL_SPACE '{vdisks+=$1} END {printf("DISKS DISKS: %sGB VDISKS: %sGB RATIO: %s\n"),total_space/sz,vdisks,vdisks*sz/total_space}'
cinder list | cinder list | grep -v '\-\-\-\-' | awk -F '|' 'NR==1{for(i=1;i<=NF;i++)if($i~/Size/)n=i}NR>1{print $n}' | sed 's/ //g' |awk -v sz=$size -v total_space=$TOTAL_SPACE '{vdisks+=$1} END {printf("DISKS DISKS: %sGB VDISKS: %sGB RATIO: %s\n"),total_space/sz,vdisks,vdisks*sz/total_space}'
}
SHUTDOWN_VIRTBR
echo -e "\033[35mStart Check Services ...\033[0m"
CHECK_HARDWARE
CHECK_NTP
CHECK_CPU
CHECK_MEM
CHECK_DISK
CHECK_NETWORK
CHECK_MYSQL_SLAVE
CHECK_RABBITMQ
CHECK_MEMCACHE
CHECK_REDIS
CHECK_TOMCAT
CHECK_NGINX
CHECK_APACHE2
CHECK_KEYSTONE
CHECK_GLANCE
CHECK_NOVA
CHECK_NEUTRON
CHECK_CINDER
echo ""
echo -e "\033[35mStart Check Resources Usage ...\033[0m"
CHECK_CEPH
CHECK_VMS