由于线上的服务多,日志量也是逐日增大,一直人工处理也是一个头疼的一个点,所以想出了一个方案,自动压缩历史的日志,空出多余的空间出来。根据月份打包
支持压缩的文件格式:
1、newWebSite.log.2019-05-14
2、info.2019-01-03.out
执行命令对应的日志目录:
python zipFile.py /home/webApp/logs
#!/usr/bin/python # coding:utf-8 import datetime import os import sys import commands """ author:jimw date:2019-11-28 支持范围: 1、newWebSite.log.2019-05-14 2、catalina.2019-01-03.out python zipFile.py /home/webApp/logs 根据月份打包 zip logging """ # 返回需要打包的集合 def loggingFile(filepath): # 遍历filepath下所有文件,包括子目录 下个小版本优化 files = os.listdir(filepath) # 这里必须排序,不然无法整合出数据 files.sort() now = datetime.datetime.now() # 得到今年的的时间 (年份) 得到的today_year等于2019年 today_month = str(now.year) + '-' + str(now.month) ### data_list_todays = [] data_list_files = [] dictName = dict() flagName = '' os.chdir(filepath) for fi in files: fi_d = os.path.join(filepath, fi) if not os.path.isdir(fi_d): varName = os.path.join(filepath, fi_d) varName = varName.split('/')[-1] # 文件名统计 if (varName.split('.')[1] == 'log'): varFileName = varName.split('.')[0] + '.' + varName.split('.')[1] else: varFileName = varName.split('.')[0] if flagName == '': # flagName = varFileName print() elif flagName != varFileName: data_list_todays = [] flagName = varFileName print('flagName:', flagName) data_list_files.append(varFileName) # 判断是否重复 if containsDuplicate(data_list_files): data_list_files.remove(varFileName) # print(data_list_files) # 月份统计 # print(varFileName) try: if (varName.split('.')[1] == 'log'): varNameindex = varName.split('.')[2] varNameMonth = varNameindex.split('-')[0] + '-' + varNameindex.split('-')[1] else: varNameindex = varName.split('.')[1] varNameMonth = varNameindex.split('-')[0] + '-' + varNameindex.split('-')[1] if (varNameMonth == today_month): print('当前月份不压缩') continue # print(varNameMonth) except(Exception): # print(varName) continue data_list_todays.append(varNameMonth) if containsDuplicate(data_list_todays): data_list_todays.remove(varNameMonth) # pythodata_list_todays.reverse() data_list_todays.reverse() print(data_list_todays) dictName[varFileName] = data_list_todays return dictName # 如果返回true 则不新加 def containsDuplicate(nums): """ :type nums: List[int] :rtype: bool """ if len(nums) == 0 or len(nums) == 1: return False d = {} for i in nums: if i in d: return True d[i] = 0 return False # 执行命令 def execCmd(cmd): print(cmd) # r = os.popen(cmd) (status, output) = commands.getstatusoutput(cmd) print("result:", status) print("result output:", output) text = 'ok' return text # 遍历结果 def zipFile(result): print(result) for file, valus in result.items(): # print('不存在则打包') # print(file) # print(valus) for months in valus: varZipFile = file + '.' + months + '.tar.gz' if os.path.isfile(varZipFile): print('varZipFile 已经存在不需要更新:', varZipFile) continue # 压缩 cmdval = 'tar -zcvf ./' + varZipFile + ' ' + file + '.' + months + '*' print(cmdval) execCmd(cmdval) # 删除,后续再说 if __name__ == '__main__': # 递归遍历/root目录下所有文件 result = loggingFile(sys.argv[1]) # result = loggingFile('D://logs') zipFile(result)