线上用lvs做负载均衡,最近发现几台机器有些时间段经常被踢出,写了个python脚本用来做简单的端口和ping检测
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
|
import os,socket,time
import threading
def check_port(host,port):
s = None
for res in socket.getaddrinfo(host, port, socket.AF_UNSPEC,socket.SOCK_STREAM):
af, socktype, proto, canonname, sa = res
try :
s = socket.socket(af, socktype, proto)
except socket.error, msg:
s = None
print str (msg)
continue
try :
s.settimeout( 2 )
s.connect(sa)
except socket.error, msg:
print str (msg)
s.close()
s = None
continue
break
if s is None :
return 0
s.close()
return 1
def ping_server(ip):
cmd = """
ping -c 50 %s|egrep "received|mdev" | sed "s,^,%s : ," >> %s/%s_ping.log
""" % (ip,time.ctime(time.time()),logs_dir,ip)
print cmd
os.popen(cmd)
def check_network():
check_ip_list = get_hosts()
thread_pool = []
for i in range ( len (check_ip_list)):
ip = check_ip_list[i]
print ip
th = threading.Thread(target = ping_server,args = (ip,) ) ;
thread_pool.append(th)
for i in range ( len (check_ip_list)):
thread_pool[i].deamon = True
thread_pool[i].start()
def get_hosts():
hosts_list = []
f = open ( '/etc/hosts' )
for i in f.readlines():
if 'hadoop-' in i:
host = i.strip().split()[ 0 ]
hosts_list.append(host)
else :
continue
f.close()
print hosts_list
return hosts_list
def get_hosts_ports():
hosts_ports_list = []
f = open ( '/etc/hosts' )
for i in f.readlines():
if 'hadoop-' in i:
host = i.strip().split()[ 0 ]
hosts_ports_list.append(host + ':10050' )
else :
continue
f.close()
print hosts_ports_list
return hosts_ports_list
def check_server_port():
port_file = open ( "%s/check_port.log" % (logs_dir), 'a' )
dt = time.ctime(time.time())
checklist = get_hosts_ports()
for checkitem in checklist:
host, port = checkitem.split( ':' )
if check_port(host, port):
port_file.write( '%s connect to %s success\n' % (dt,checkitem))
else :
port_file.write( '%s connect to %s fail' % (dt,checkitem))
def check_health():
while True :
check_server_port()
check_network()
time.sleep( 60 )
if __name__ = = '__main__' :
logs_dir = '/apps/logs/ping'
if not (os.path.exists(logs_dir)):
os.makedirs(logs_dir)
check_health()
|
本文转自菜菜光 51CTO博客,原文链接:http://blog.51cto.com/caiguangguang/1352307,如需转载请自行联系原作者