对系统日志的检查和数据库日志的检查很重要,出现问题及时的通知系统管理员更为重要,本脚本用python写的监控脚本,主要是为zabbix监控自定义的key而准备的,当然大家也可以在返回值方面做修改,可以在写个发邮件的模块,做个定时,有问题自动发邮件(在之前写过一个发邮件的类,大家可以做参考:http://wangwei007.blog.51cto.com/68019/978743)。在zabbix中自定义key来检测系统日志和数据库日志:
UnsafeUserParameters=1
UserParameter=check.sys_error,/usr/local/zabbix/bin/chk_err_log.py syslog
UserParameter=check.mysql_error,/usr/local/zabbix/bin/chk_err_log.py mysqllog
本脚本适合一台服务器多实例的mysql错误日志检测,也适用于单个示例的检测,根据自己的需求做修改。
- #!/usr/bin/env python
- #encoding=utf-8
- import os, sys
- def chk_err(log_file,tmp_file,type,print_list,port):
- cur_num = int(os.popen("sudo grep '' %s | wc -l" % log_file).read().strip())
- old_num = 0
- if os.path.exists(tmp_file):
- old_num = int(open(tmp_file).read().strip())
- if cur_num < old_num:
- os.popen("echo 0 > %s" % tmp_file)
- old_num = 0
- else:
- os.popen("echo 0 > %s" % tmp_file)
- err_log = os.popen("sudo grep -ni 'error' %s" % log_file).readlines()
- if err_log:
- err_list = []
- for err in err_log:
- if int(err.split(":")[0]) > old_num:
- err_list.append(err[len(err.split(":")[0])+1:])
- if err_list:
- os.popen("echo %s > %s" % (err_log[-1].split(":")[0], tmp_file))
- print_list.append(port)
- def chk_err_log(type):
- try:
- print_list = []
- homedir = "/home/zabbix"
- if not os.path.exists(homedir):
- os.mkdir(homedir)
- if type == "syslog":
- log_file = "/var/log/messages"
- tmp_file = "%s/.syslog_num"%homedir
- cur_num = int(os.popen("sudo grep '' %s | wc -l" % log_file).read().strip())
- old_num = 0
- if os.path.exists(tmp_file):
- old_num = int(open(tmp_file).read().strip())
- if cur_num < old_num:
- os.popen("echo 0 > %s" % tmp_file)
- old_num = 0
- else:
- os.popen("echo 0 > %s" % tmp_file)
- err_log = os.popen("sudo grep -ni 'error' %s|grep -v snmpd|grep -v sftp" % log_file).readlines()
- if not err_log:
- return "0"
- err_list = []
- for err in err_log:
- if int(err.split(":")[0]) > old_num:
- err_list.append(err[len(err.split(":")[0])+1:])
- if not err_list:
- return "0"
- else:
- os.popen("echo %s > %s" % (err_log[-1].split(":")[0], tmp_file))
- return "1"
- elif type == "mysqllog":
- psinfo = os.popen("ps auxww|grep mysqld|grep -v root|grep -v grep").readlines()
- if not psinfo:
- return "No mysqld running in this server now"
- for i in psinfo:
- port = "0"
- for j in i.split("--"):
- if j.find("datadir") != -1:
- datadir = j.split("=")[1].strip()
- elif j.find("port") != -1:
- port = j.split("=")[1].strip()
- if port == "0":
- continue
- if port == "3306":
- log_file = "%s/$(hostname).err" % datadir
- else:
- log_file = "%s/mysql.err" % datadir
- tmp_file = "%s/.mysqllog_%s" % (homedir,port)
- chk_err(log_file,tmp_file,type,print_list,port)
- if len(print_list)==0:
- return "0"
- else:
- return print_list
- except Exception, e:
- return e
- if __name__ == "__main__":
- print chk_err_log(sys.argv[1])
本文转自 lover00751CTO博客,原文链接:http://blog.51cto.com/wangwei007/1106735,如需转载请自行联系原作者