之前有一篇文章讲到使用update percent监控agent的数据提交状况,可以有效地发现agent的故障问题,而使用unreachable的时候,会因为unreachable process busy的情况造成误报(可以通过增大StartPollersUnreachable和UnreachablePeriod解决),附一个python小程序,用来计算host的update percent(遇到host update 为0时不能报警的情况,使用RIGHT JOIN+IFNULL解决).
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
|
#!/usr/bin/python # -*- coding: utf8 -*- #get zabbix agent update percent import MySQLdb
import os
import sys
import smtplib
from email.MIMEText import MIMEText
reload (sys)
sys.setdefaultencoding( 'utf-8' )
def SendMail(sub,content,to_list = [ "ericni.ni@vipshop.com" ]):
me = mail_user + "<" + mail_user + ">"
msg = MIMEText(content, 'html' , 'utf-8' )
msg[ 'Subject' ] = sub
msg[ 'From' ] = me
msg[ 'To' ] = ";" .join(to_list)
try :
s = smtplib.SMTP()
s.connect(mail_host)
s.login(mail_user,mail_pass)
s.sendmail(me, to_list, msg.as_string())
s.close()
return True
except Exception, e:
print str (e)
return False def getAll(sql):
db = MySQLdb.connect( 'xxx' , 'xxx' , 'xxx' , 'xxx' )
cursor = db.cursor()
try :
cursor.execute(sql)
result = cursor.fetchall()
except Exception,e:
print "failed info %s" % ( str (e))
print result
print type (result)
return result
cursor.close()
db.close()
def getReport(allInfo):
print allInfo
mailcontent = ""
print type (allInfo)
if len (allInfo) = = 0 :
pass
else :
mailcontent = """
<html> <head> <meta http-equiv="Content-Type" content="text/html";charset=utf-8> <title>Zabbix agent update percent</title> <style type="text/css">
body { font-size: 14px; color: #333;background-color: #fff;}
td { border: 1px solid #C1DAD7;padding:"4px 10px"; line-height: 24px;}
table {border-collapse: collapse; width: 96%s;}
.divtd {color:#E28E40;}
.divcss5{ color:#F00;}
</style>
</head> <body> <table style="border-collapse: collapse; width: 60%;">
<tbody>
<tr>
<td><div class="divtd">domain</div></td>
<td><div class="divtd">ip</div></td>
<td><div class="divtd">percent</div></td>
</tr>
"""
for line in allInfo:
mailcontent + = "<tr>"
mailcontent + = "<td><div>%s</div></td>" % line[ 0 ]
mailcontent + = "<td><div>%s</div></td>" % line[ 1 ]
if float (line[ 2 ]) < = 50 :
mailcontent + = """<td><div style="color: #F80000;">%.2f</div></td>""" % line[ 2 ]
else :
mailcontent + = "<td><div>%.2f</div></td>" % line[ 2 ]
mailcontent + = "</tr>"
mailcontent + = " </tbody> </table>"
mailcontent + = " </body> </html> "
print mailcontent
SendMail( "Zabbix host update percent" ,mailcontent.encode( 'utf-8' ))
if __name__ = = "__main__" :
mail_host = "xxx"
mail_user = "xxxx"
mail_pass = "xxxx"
mail_port = "xxxx" allInfo = []
sql = """
select b.hostname ,c.ip,a.update_percent as uppercent from (
select b.hostid,ROUND(IFNULL(a.aa,0)*100/b.bb,2) as update_percent from (select hostid,count(*) as aa from items where lastclock > UNIX_TIMESTAMP()-1800 and delay < 900 and hostid in (select hostid from hosts where status=0) and status = 0 group by hostid ) a RIGHT JOIN (select hostid,count(*) as bb from items where delay < 900 and status = 0 and hostid in (select hostid from hosts where status=0) group by hostid) b ON a.hostid=b.hostid)a,(select hostid,lower(host) as hostname from hosts where status=0)b, (select hostid,ip from interface where type='1')c where a.hostid=b.hostid and b.hostid=c.hostid having(a.update_percent) < 80 order by uppercent; """
allInfo = getAll(sql)
getReport(allInfo)
|
产生的报警邮件如下:
本文转自菜菜光 51CTO博客,原文链接:http://blog.51cto.com/caiguangguang/1345789,如需转载请自行联系原作者