Python快速读取文件指定行

快速读取文件指定行

# 写入
s = '[2021-5-25 12:41:9];192.168.0.110;0.0.0.0;init-setting'
with open('log.txt', 'a', encoding='utf-8') as w:
    for i in range(100000):
        w.write(s + '\n')
        print('第{}行写入'.format(i + 1))

 

文件30M左右时

t1 = (time.time()) * 1000
# with open('log.txt', 'r', encoding='utf-8') as r:
#     s = [i.split(';') for i in r.read().split('\n') if i != ''][10:20]
# logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in s]
# 720.24609375毫秒

# with open('log.txt', 'r', encoding='utf-8') as r:
#     s = [i.replace('\n', '').split(';') for i in r.readlines()][10:20]
# logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in s]
# 826.21923828125毫秒


import linecache

logs_list = []
for i in range(10, 20):
    count = linecache.getline('log.txt', i + 1).replace('\n', '').split(';')
    logs_list.append(count)
logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in logs_list]

with open('log.txt', 'r', encoding='utf-8') as r:
    s = r.read().count('\n')
# 194.0517578125毫秒
print(logs_list)
print(s)
t2 = (time.time()) * 1000
print(str(t2 - t1) + '毫秒')
t1 = (time.time()) * 1000
# with open('log.txt', 'r', encoding='utf-8') as r:
#     s = [i.split(';') for i in r.read().split('\n') if i != '']
# logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in s][10:20]
# logs_list_length = len(s)
# 2478.570068359375毫秒

# with open('log.txt', 'r', encoding='utf-8') as r:
#     s = [i.replace('\n', '').split(';') for i in r.readlines()]
# logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in s][10:20]
# logs_list_length = len(s)
# 2743.895751953125毫秒


import linecache

logs_list = []
for i in range(10, 20):
    count = linecache.getline('log.txt', i + 1).replace('\n', '').split(';')
    logs_list.append(count)
logs_list = [{'time': i[0], 'attack_ip': i[1], 'my_ip': i[2], 'attack_type': i[3]} for i in logs_list]

with open('log.txt', 'r', encoding='utf-8') as r:
    logs_list_length = r.read().count('\n')
# 504.13623046875毫秒
print(logs_list)
print(logs_list_length)
t2 = (time.time()) * 1000
print(str(t2 - t1) + '毫秒')

 

文件90M左右时

上一篇:SparkSQL2.x的数据源jdbc


下一篇:mysql binlog日志自动清理及手动删除