介绍了Python统计日志中每个IP出现次数的方法,实例分析了Python基于正则表达式解析日志文件的相关技巧,需要的朋友可以参考下
本脚本可用于多种日志类型
#-*- coding:utf-8 -*-
import re,time
def mail_log(file_path):
global count
log=open(file_path,'r')
C=r'\.'.join([r'\d{1,3}']*4)
find=re.compile(C)
count={}
for i in log:
for ip in find.findall(i):
count[ip]=count.get(ip,1)+1
if __name__ == '__main__':
print time.clock()
num=0
mail_log(r'e:\MDaemon-20110329-all.log')
R=count.items()
for i in R:
if i[1]>0: #提取出现次数大于0的IP
print i
num+=1
print '符合要求数量:%s耗时(%s)'%(num,time.clock())
输出结果如下:
('206.220.200.250', 8)
('66.40.52.37', 10)
('66.40.52.36', 5)
('207.115.11.41', 4)
('96.47.193.25', 9)
('96.47.193.24', 5)
('96.47.193.23', 17)
('72.32.181.92', 5)
('67.76.103.168', 10)
('64.34.161.218', 5)
('209.151.96.3', 7)
('61.135.168.0', 15)
('199.81.128.37', 2)
('199.81.128.36', 2)
('199.81.128.38', 2)
('198.45.19.170', 16)
('12.236.15.9', 4)
('66.96.142.52', 51)
('66.96.142.51', 55)
('66.96.142.50', 62)
('64.18.5.13', 1553)
('69.39.47.14', 9)
('64.18.5.11', 1557)
('64.18.5.10', 2752)
('210.72.13.102', 4)
('64.118.108.196', 4)
('66.60.192.44', 26)
('112.90.194.8', 4)
('198.49.244.245', 5)
('216.183.174.227', 5)
('195.245.230.131', 5)
('211.115.13.27', 5)
('222.247.123.217', 3)
('218.213.85.210', 2)
('201.236.205.96', 3)
('209.85.161.136', 2)
('173.165.120.188', 5)
('50.22.89.39', 7)
('219.129.20.168', 3)
('24.106.197.167', 5)
('207.190.225.69', 4)
('156.3.32.236', 5)
('209.92.157.161', 5)
('216.153.192.200', 5)
('76.77.158.130', 3)
('12.166.4.221', 5)
('66.46.182.96', 4)
('80.252.97.102', 4)
('66.46.182.94', 5)
('66.46.182.95', 4)
('124.14.5.3', 3)
('202.85.139.0', 5)
('207.173.160.17', 15)
('143.101.0.21', 5)
('65.75.75.59', 9)
('77.88.21.89', 53)
('216.128.11.30', 44)
原文转载:http://www.jb51.net/article/69011.htm