-
jstat无法使用的问题
因为java是tomcat启动的,所以使用jstat命令,报not found错误。
解决方法,网上很多都不行,以下可以
简单的说,就是sudo -u tomcat /usr/java/jdk/bin/jstat -gcutil 22631
2.vim /etc/sudoers
增加这两行
1
2
|
zabbix ALL=(tomcat) NOPASSWD: /bin/netstat , /usr/bin/omreport , /usr/java/jdk/bin/jstat , /usr/bin/python
tomcat ALL=(ALL) NOPASSWD: /bin/kill , /bin/chown , /usr/java/jdk/bin/jstat , /usr/bin/python , /bin/netstat , /usr/bin/omreport
|
就是zabbix必须sudo成tomcat
3.jstat的输出变化
java1.8取消了永久区
1
2
3
|
sudo -u tomcat /usr/java/jdk/bin/jstat -gc 22631
S0C S1C S0U S1U EC EU OC OU MC MU CCSC CCSU YGC YGCT FGC FGCT GCT
19968.0 20992.0 0.0 4810.3 88576.0 42080.7 393216.0 32415.5 43952.0 42767.3 5296.0 5074.8 25 0.428 2 0.150 0.578 |
与1.7的不同,去掉了Perm
MC:方法区大小
MU:方法区使用大小
CCSC:压缩类空间大小
CCSU:压缩类空间使用大小
4.zabbix agentd配置变化
1
2
3
|
cat /usr/local/zabbix/etc/zabbix_agentd .conf.d /discovery_java_status .conf
UserParameter=java.discovery, /usr/bin/python /usr/local/zabbix/bin/java_discovery .py
UserParameter=java.discovery_status[*], /usr/bin/python /usr/local/zabbix/bin/jstat_status .py $1 $2
|
cd /usr/local/zabbix/bin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
|
cat java_discovery.py
#/usr/bin/python #This script is used to discovery disk on the server import subprocess
import os
import socket
import json
import glob
java_names_file= 'java_names.txt'
javas=[] if os.path.isfile(java_names_file):
# print 'java_names_file exists!' ##### ##### here should use % (java_names_file) instead of using the python variable java_names_file directly inside the ''' ''' quotes ##### args= '' 'awk -F' : ' ' {print $1 ':' $2} ' %s' '' % (java_names_file)
t=subprocess.Popen(args,shell=True,stdout=subprocess.PIPE).communicate()[0]
elif glob.glob( '/opt/xx/*_tomcat' ) and not os.path.isdir( '/opt/logs/logstash' ) and not os.path.isdir( '/opt/app/elasticsearch/config' ):
t=subprocess.Popen( 'cd /opt/xx && ls *_tomcat|grep _tomcat' ,shell=True,stdout=subprocess.PIPE)
for java in t.stdout.readlines():
if len(java) != 0:
javas.append({ '{#JAVA_NAME}' :java.strip( '\n' ).strip( ':' )})
print json.dumps({ 'data' :javas},indent=4,separators=( ',' , ':' ))
|
自己做修改,修改命令 cd /opt/xx && ls *_tomcat|grep _tomcat
这里说明以下,tomcat应用最好起统一的名字,比如xx_tomcat等,这样写脚本起来方便。
cd /usr/local/zabbix/bin
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
|
cat jstat_status.py
#!/usr/bin/python import subprocess
import sys
import os
__maintainer__ = "John Wang"
jps = '/usr/java/jdk/bin/jps'
jstat = '/usr/java/jdk/bin/jstat'
zabbix_sender = "/usr/local/zabbix/bin/zabbix_sender"
zabbix_conf = "/usr/local/zabbix/etc/zabbix_agentd.conf" send_to_zabbix = 1 ip=os.popen( "ifconfig|grep 'inet '|grep -v '127.0'|xargs|awk -F '[ :]' '{print $3}'" ).readline().rstrip()
serverip= "你的zabbix服务端ip"
#"{#JAVA_NAME}":"tomcat_web_1" def usage(): "" "Display program usage" ""
print "\nUsage : " , sys.argv[0], " java_name alive|all"
print "Modes : \n\talive : Return pid of running processs\n\tall : Send jstat stats as well"
sys. exit (1)
class Jprocess: def __init__(self, arg):
self.pdict = {
"jpname" : arg,
}
self.zdict = {
"Heap_used" : 0,
"Heap_ratio" : 0,
"Heap_max" : 0,
"Perm_used" : 0,
"Perm_ratio" : 0,
"Perm_max" : 0,
"S0_used" : 0,
"S0_ratio" : 0,
"S0_max" : 0,
"S1_used" : 0,
"S1_ratio" : 0,
"S1_max" : 0,
"Eden_used" : 0,
"Eden_ratio" : 0,
"Eden_max" : 0,
"Old_used" : 0,
"Old_ratio" : 0,
"Old_max" : 0,
"YGC" : 0,
"YGCT" : 0,
"YGCT_avg" : 0,
"FGC" : 0,
"FGCT" : 0,
"FGCT_avg" : 0,
"GCT" : 0,
"GCT_avg" : 0,
}
def chk_proc(self):
# ps -ef|grep java|grep tomcat_web_1|awk '{print $2}' # print self.pdict['jpname'] pidarg = '' 'ps -ef|grep java|grep %s|grep -v grep|awk ' {print $2} ' ' '' %(self.pdict[ 'jpname' ])
pidout = subprocess.Popen(pidarg,shell=True,stdout=subprocess.PIPE)
pid = pidout.stdout.readline().strip( '\n' )
if pid != "" :
self.pdict[ 'pid' ] = pid
# print "Process found :", java_name, "with pid :", self.pdict['pid'] else :
self.pdict[ 'pid' ] = ""
# print "Process not found" return self.pdict[ 'pid' ]
def get_jstats(self):
if self.pdict[ 'pid' ] == "" :
return False
self.pdict.update(self.fill_jstats( "-gc" ))
self.pdict.update(self.fill_jstats( "-gccapacity" ))
self.pdict.update(self.fill_jstats( "-gcutil" ))
# print "\nDumping collected stat dictionary\n-----\n", self.pdict, "\n-----\n" def fill_jstats(self, opts):
# print "\nGetting", opts, "stats for process", self.pdict['pid'], "with command : sudo", jstat, opts, self.pdict['pid'] ,"\n" jstatout = subprocess.Popen([ 'sudo' , '-u' , 'tomcat' , jstat, opts, self.pdict[ 'pid' ]], stdout=subprocess.PIPE)
stdout, stderr = jstatout.communicate()
legend, data = stdout. split ( '\n' ,1)
mydict = dict(zip(legend. split (), data. split ()))
return mydict
def compute_jstats(self):
if self.pdict[ 'pid' ] == "" :
return False
self.zdict[ 'S0_used' ] = format (float(self.pdict[ 'S0U' ]) * 1024, '0.2f' )
self.zdict[ 'S0_max' ] = format (float(self.pdict[ 'S0C' ]) * 1024, '0.2f' )
self.zdict[ 'S0_ratio' ] = format (float(self.pdict[ 'S0' ]), '0.2f' )
self.zdict[ 'S1_used' ] = format (float(self.pdict[ 'S1U' ]) * 1024, '0.2f' )
self.zdict[ 'S1_max' ] = format (float(self.pdict[ 'S1C' ]) * 1024, '0.2f' )
self.zdict[ 'S1_ratio' ] = format (float(self.pdict[ 'S1' ]), '0.2f' )
self.zdict[ 'Old_used' ] = format (float(self.pdict[ 'OU' ]) * 1024, '0.2f' )
self.zdict[ 'Old_max' ] = format (float(self.pdict[ 'OC' ]) * 1024, '0.2f' )
self.zdict[ 'Old_ratio' ] = format (float(self.pdict[ 'O' ]), '0.2f' )
self.zdict[ 'Eden_used' ] = format (float(self.pdict[ 'EU' ]) * 1024, '0.2f' )
self.zdict[ 'Eden_max' ] = format (float(self.pdict[ 'EC' ]) * 1024, '0.2f' )
self.zdict[ 'Eden_ratio' ] = format (float(self.pdict[ 'E' ]), '0.2f' )
# self.zdict['Perm_used'] = format(float(self.pdict['PU']) * 1024,'0.2f') # self.zdict['Perm_max'] = format(float(self.pdict['PC']) * 1024,'0.2f') # self.zdict['Perm_ratio'] = format(float(self.pdict['P']),'0.2f') self.zdict[ 'Heap_used' ] = format ((float(self.pdict[ 'EU' ]) + float(self.pdict[ 'S0U' ]) + float(self.pdict[ 'S1U' ]) + float(self.pdict[ 'OU' ])) * 1024, '0.2f' )
self.zdict[ 'Heap_max' ] = format ((float(self.pdict[ 'EC' ]) + float(self.pdict[ 'S0C' ]) + float(self.pdict[ 'S1C' ]) + float(self.pdict[ 'OC' ])) * 1024, '0.2f' )
self.zdict[ 'Heap_ratio' ] = format (float(self.zdict[ 'Heap_used' ]) / float(self.zdict[ 'Heap_max' ])*100, '0.2f' )
self.zdict[ 'YGC' ] = self.pdict[ 'YGC' ]
self.zdict[ 'FGC' ] = self.pdict[ 'FGC' ]
self.zdict[ 'YGCT' ] = format (float(self.pdict[ 'YGCT' ]), '0.3f' )
self.zdict[ 'FGCT' ] = format (float(self.pdict[ 'FGCT' ]), '0.3f' )
self.zdict[ 'GCT' ] = format (float(self.pdict[ 'GCT' ]), '0.3f' )
if self.pdict[ 'YGC' ] == '0' :
self.zdict[ 'YGCT_avg' ] = '0'
else :
self.zdict[ 'YGCT_avg' ] = format (float(self.pdict[ 'YGCT' ]) /float (self.pdict[ 'YGC' ]), '0.3f' )
if self.pdict[ 'FGC' ] == '0' :
self.zdict[ 'FGCT_avg' ] = '0'
else :
self.zdict[ 'FGCT_avg' ] = format (float(self.pdict[ 'FGCT' ]) /float (self.pdict[ 'FGC' ]), '0.3f' )
if self.pdict[ 'YGC' ] == '0' and self.pdict[ 'FGC' ] == '0' :
self.zdict[ 'GCT_avg' ] = '0' else :
self.zdict[ 'GCT_avg' ] = format (float(self.pdict[ 'GCT' ])/(float(self.pdict[ 'YGC' ]) + float(self.pdict[ 'FGC' ])), '0.3f' )
# print "Dumping zabbix stat dictionary\n-----\n", self.zdict, "\n-----\n"
def send_to_zabbix(self, metric):
#### {#JAVA_NAME} tomcat_web_1 #### UserParameter=java.discovery,/usr/bin/python /opt/app/zabbix/sbin/java_discovery.py #### UserParameter=java.discovery_status[*],/opt/app/zabbix/sbin/jstat_status.sh $1 $2 $3 $4 #### java.discovery_status[tomcat_web_1,Perm_used] #### java.discovery_status[{#JAVA_NAME},Perm_used] key = "java.discovery_status[" + self.pdict[ 'jpname' ] + "," + metric + "]"
if self.pdict[ 'pid' ] != "" and send_to_zabbix > 0:
#print key + ":" + str(self.zdict[metric])
try:
subprocess.call([zabbix_sender, "-c" , zabbix_conf, "-k" , key, "-o" , str(self.zdict[metric])], stdout=FNULL,stderr=FNULL, shell=False)
except OSError, detail:
print "Something went wrong while exectuting zabbix_sender : " , detail
else :
print "Simulation: the following command would be execucted :\n" , zabbix_sender, "-c" , zabbix_conf, "-k" , key, "-o" , self.zdict[metric], "\n"
accepted_modes = [ 'alive' , 'all' ]
if len(sys.argv) == 3 and sys.argv[2] in accepted_modes:
java_name = sys.argv[1]
mode = sys.argv[2]
else :
usage()
#Check if process is running / Get PID jproc = Jprocess(java_name) pid = jproc.chk_proc() if pid != "" and mode == 'all' :
jproc.get_jstats()
# print jproc.zdict
jproc.compute_jstats()
FNULL = open (os.devnull, 'w' )
for key in jproc.zdict:
#print key,jproc.zdict[key]
jproc.send_to_zabbix(key)
FNULL.close()
print pid
else :
print 0
|
1
2
3
4
5
6
7
8
9
|
说明,跟参考文章的不同点 1. 'sudo' 改成 'sudo' , '-u' , 'tomcat'
2.注释perm段,因为jdk1.8 jstat输出没有了 3.send_to_zabbix 提示TypeError: cannot concatenate 'str' and 'int' objects
self.zdict[metric]换成str(self.zdict[metric]) 4.[zabbix_sender, "-c" , zabbix_conf, "-k" , key, "-o" , str(self.zdict[metric])], stdout=FNULL,stderr=FNULL, shell=False)
这行改为 [zabbix_sender, "-c" , zabbix_conf, "-k" , key, "-o" , str(self.zdict[metric]), "-z" , serverip, "-s" , ip]
原因在于zabbix_sender 发送数据的需要指定-s,具体参考上面ttlsa的链接 |
5.重启zabbix_agentd
6.模板的话
http://john88wang.blog.51cto.com/2165294/1708302 里面有,讲解也很详细。
7.zabbix_get和zabbix_send 测试
zabbix_get
/usr/local/zabbix/bin/zabbix_get -s $ip -p 10050 -k java.discovery_status[你的应用名字,all]
zabbix_send
/usr/local/zabbix/bin/zabbix_sender -c /usr/local/zabbix/etc/zabbix_agentd.conf -k "java.discovery_status[你的应用名字,GCT_avg]" -o '0.021' -s xx -z serverip
xx表示为zabbix_agentd里面的hostname,这里要注意一下
如果你这个hostname与web界面不一样,那么/var/log/zabbix_agentd.log(log是排错的重要手段)会报错,no active checks on server [xx:10051]: host [xx] not found
最好改成一致
hostname=你的客户端ip,我的web界面都是ip(太low了,需要改成ip+服务,看起来方便)
8.模板我改成了30分钟,正常30分钟出图
首先看最新数据,有没有应用集,有那么你就成功了。
坑太多了,如果有问题,提出一起谈论下。
后期,这个脚本还是要修改的,增加第三步jstat输出的四个变量
MC:方法区大小
MU:方法区使用大小
CCSC:压缩类空间大小
CCSU:压缩类空间使用大小
最后提醒,一定要确认zabbix是否能获取数据
sudo -u zabbix /usr/bin/python /usr/local/zabbix/bin/jstat_status.py xx_tomcat all
因为碰到有几台机器不能出图,所以做一下最后的check
1.cat /etc/hosts
ip hostname
2.cat /etc/sudoers
zabbix ALL=(tomcat) NOPASSWD:/bin/netstat,/usr/bin/omreport,/usr/java/jdk/bin/jstat,/usr/bin/python
tomcat ALL=(ALL) NOPASSWD:/bin/kill,/bin/chown,/usr/java/jdk/bin/jstat,/usr/bin/python,/bin/netstat,/usr/bin/omreport
3.sudo -u zabbix /usr/bin/python /usr/local/zabbix/bin/jstat_status.py xx_tomcat all
没有数据,很有可能是zabbix权限问题。排查的第一点。终于完成了。
最后更新 2017.1.4
-
步骤一定要按评论里面一步步操作
-
sudo -u zabbix /usr/local/zabbix/bin/zabbix_sender -c /usr/local/zabbix/etc/zabbix_agentd.conf -k "java.discovery_status[xx_tomcat,GCT_avg]" -o '0.022' -s xx -z xx
当你尝试zabbix_sender的时候,我在线上,有几台没有出图。
原因在于,xx_tomcat是可以的,但是如果是这样xx-xx-xx_tomcat是会fail的
如果你用strace跟踪zabbix_sender
最终只有2和0的区别,其他没显示,strace跟踪不行。
3.如果要调试,请参考
gdb 调试zabbix_server解决zabbix_sender不成功的问题
此文章总结了根本原因
1、类型一定要是zabbix_trap类型;
2、allowed ip要填写zabbix_sender的ip地址,如果有多个,使用,分割;
3、zabbix_sender命令中的-s参数hostname要和server的web界面上一致;
终于完了。
本文转自 liqius 51CTO博客,原文链接:http://blog.51cto.com/szgb17/1812342,如需转载请自行联系原作者