1、下载 1万条日志记录,保存为一个文件,读取文件并解析日志,从日志中提取ip, time_local, http_method, url, http_status, body_bytes_sent , http_referer, ua字段,以制表符分割。
在mysql中创建表,将刚才解析后的结果存入mysql表中。
2、在mysql中,使用sql查询出现次数最多的ip,以及该ip的访问次数。 这里写代码片
file_data
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# coding:utf-8
# ip, time_local, http_method, url, http_status, body_bytes_sent , http_referer, ua
import re
data_all = {}
data_ev = {}
print( "开始提取。。。" )
with open ( "access_parts.log" ) as f:
count = 1
for line in f.readlines():
line = line.strip( '\n' )
line = re.split( "\s|-" , line)
# print(line)
data_ev[ "ip" ] = line[0]
data_ev[ "time_local" ] = line[5].split( '[' )[1]
data_ev[ "http_method" ] = line[7].split( '"' )[1]
data_ev["url "] = line[8]
data_ev[" http_status "] = line[9]
if line[10] == '-':
data_ev[" body_bytes_sent "] = 0
else:
data_ev[" body_bytes_sent "] = line[10]
data_ev[" http_referer "] = line[11].split('" ')
data_ev[ "ua" ] = ' ' . join (line[12:23]).split( '"' )
if data_ev["http_method "] == 'GET':
data_ev[" ua "] = data_ev[" ua "][3]
else:
data_ev[" ua "] = data_ev[" ua "][1] + data_ev[" ua "][3]
print(data_ev)
data_all[count] = data_ev
# print(" 已完成 "+count+" 行。。。")
count += 1
|
database
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# coding:utf-8
from sqlalchemy import create_engine, Table , Column , Integer , String, MetaData, ForeignKey
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from file_data import data_all
BaseModel = declarative_base()
print( "开始连接数据库。。。" )
DB_CONNECT = 'mysql+pymysql://root:123456@localhost:3306/test'
engine = create_engine(DB_CONNECT, echo= True )
DB_Session = sessionmaker(bind=engine)
session = DB_Session()
print( "连接成功。。。" )
class Access(BaseModel):
print( "111" )
__tablename__ = 'data'
id = Column ( Integer (), primary_key= True )
ip = Column (String(50))
time_local = Column (String(50))
http_method = Column (String(50))
http_status = Column (String(50))
body_bytes_sent = Column (String(50))
http_referer = Column (String(50))
ua = Column (String(500))
def init_db():
BaseModel.metadata.create_all(engine)
def drop_db():
BaseModel.metadata.drop_all(engine)
drop_db()
init_db()
|
main
1 2 3 4 5 6 7 8 9 10 11 12 |
# 数据处理
# coding:utf-8
from file_data import data_all
from database import Access
from database import session
for k,v in data_all.items():
print( "开始插入。。。" )
access_ = Access(ip=v[ "ip" ], time_local=v[ "time_local" ], http_method=v[ "http_method" ], http_status=v[ "http_status" ], body_bytes_sent=v[ "body_bytes_sent" ], http_referer=v[ "http_referer" ], ua=v[ "ua" ])
session. add (access_)
session. commit ()
print( "插入成功。。。" )
|