#!/usr/bin/python
import re
def buffer_line():
buf = open("/etc/sae/buffer_1").read()
if not buf:
return 0
else:
return int(re.findall("^\d*", buf)[0])
def set_last_pos(pos):
open("/etc/sae/buffer_1", "w").write(str(pos))
if __name__ == ‘__main__‘:
appname={}
fh=open("/data0/l7.access.log")
fh.seek(buffer_line())
content=fh.read()
new_total_lines=len(content)+buffer_line()
set_last_pos(new_total_lines)
new_lines=content.split("\n")
aa=‘"SAE‘
for i in new_lines:
if i.find(aa) >= 0:
c=re.search(r‘(\d\d\d) [^ ]* "-" "SAE/(.*?)" ‘,i)
if not c:
continue
d=c.group(1)
e=c.group(2)
if e.find(‘fetchurl‘) >= 0:
f=re.search(r‘(\w.*)-‘,e)
e=f.group(1)
if e not in appname:
appname[e]={}
if d not in appname[e]:
appname[e][d]=0
appname[e][d]+=1
print appname
本文出自 “expect批量同步数据” 博客,请务必保留此出处http://4249964.blog.51cto.com/4239964/1437872