博客太久没更新了,是要拔拔草。最近刚好在比较深入的学字符串正则化匹配的内容,顺便完成一个小作品。就是解析CPM报文的功能。(CPM is short for Container Pallet Message)。
好了,照样话不多说,贴上代码
# -*- coding=utf-8 -*- # using python 3 # Author: Sysuzzd # Under Project: HS ON ML WITH SCIKIT AND TENSORFLOW # @Time : 2019-6-29 22:35 # ******************************************* Purpose & Illustration *********************************************** # 解析CPM报,并生成标准格式CPM # ********************************************************************************************************************** import sys, os import re def cpm_nor(): # 定义cpm文件地址 cpm_file_route = r'D:\*PythonLearn\HS ON ML WITH SCIKIT AND TENSORFLOW\Hands-On.Machine.Learning.with.Scikit-Learn.and.TensorFlow.2017\handson-ml-master\CPM4.txt' with open(cpm_file_route, 'r') as cpm_file: cpm_str = cpm_file.read() # print(type(cpm_str)) # out: <class 'str'> # 定义777-200F机型cpm各货舱集装器正则匹配 flight_pattern = r'\w{2}\d{3,4}\/\d{2}\w{3}\d{2}\.\w\d{4}.+\w{3}\n' # 定义航班信息模式 flight_info = re.findall(flight_pattern, cpm_str)[0][:-1] # print(flight_info) maindeck_left_pattern = r'(MAIN\sDECK\sLEFT\sSIDE.*)\n.+DECK\sRIGHT' maindeck_left_str = re.findall(maindeck_left_pattern, cpm_str, re.S | re.M)[0] # class list re.S 代表 . 可以代表任何字符,包括换行符 re.M 表示^ 和$ 是每行的开头结束 # print(maindeck_left_str) # print(re.split(r'\n', maindeck_left_str)) # print(r'-R/' in maindeck_left_str) # maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\n.+LOWER\sDECK' # 这是错误的正则匹配,会导致少掉PR位置 maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\nLOWER\sDECK' maindeck_right_str = re.findall(maindeck_right_pattern, cpm_str, re.S | re.M)[0] # print('check', maindeck_right_str) lowerdeck_pattern = r'(LOWER\sDECK.*)\nBULK' lowerdeck_str = re.findall(lowerdeck_pattern, cpm_str, re.S | re.M)[0] while 'BULK' in lowerdeck_str: lowerdeck_str = re.findall(r'(.*?)\nBULK', lowerdeck_str, re.S)[0] # print(lowerdeck_str) bulk_pattern = r'(BULK.*\nBULK?.*)\n' bulk_str = re.findall(bulk_pattern, cpm_str, re.M)[0] # print(bulk_str) # print(re.split(r'\n', bulk_str)) main_left, main_right, main_center = check_main_center(maindeck_left_str, maindeck_right_str) # print(main_center) # print(main_left) # print(main_right) lower, lower_left, lower_right = check_lower_sides(lowerdeck_str) # print('检查' , lower_left) # print('检查' , re.split(r'\n', '\n'+lower_left)[1:]) # print(lower) # 定义集装器字典 # print('check' , lower_left) # print(lower_left == '\n') flight_cpm = ULDs_777(main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str) # print(main_center) # print(flight_cpm.main_left.uld_list) # test_print_ulds_infos(flight_cpm) # 测试打印各uld的cpm格式是否正确和完整 print('Total weight of this flight is:', flight_cpm.total_weight(), 'KG') print('The later imbalance weight of this flight is:', flight_cpm.imbalance_weight() , 'KG') # 匹配集装器存进字典 # 按标准模式输出cpm cpm_nor_txt = cpm_nor_output(flight_cpm, flight_info) print(cpm_nor_txt) return def check_main_center(left, right): # 待补充非R位置,中线装载时的情况 if r'-R/' in left: center_pattern = r'-R.*|-[A-Z]LR.*' center_sub_pattern = r'\n-R.*|\n-[A-Z]LR.*' # 记得要加\n,不然会留下空的一行 center = '' # print('leng', len(re.findall(center_pattern, left))) for i in re.findall(center_pattern, left): center = center + '\n' + i # center = '\n' + center left1 = re.sub(center_sub_pattern, '', left) right1 = right return left1, right1, center def check_lower_sides(lower): lower_left_pattern = r'(-\d{2}L[^\sR]*)\s+' # lower_left_pattern = r'(-\d{2}L[^\s]*)\s+' 少了1个R会导致将LR的类型也算到L中去 lower_right_pattern = r'(-\d{2}R.*)\n' lower_sides_sub_pattern = r'-\d{2}L.*-\d{2}R.*\n' lower_left_list = re.findall(lower_left_pattern, lower) # print('检查', lower_left_list) lower_right_list = re.findall(lower_right_pattern, lower) lower1 = re.sub(lower_sides_sub_pattern, '', lower) # print(lower_left_list) # print(lower_right_list) lower_left = '' lower_right = '' if lower_left_list: lower_left = lower_left_list[0] if len(lower_left_list) >= 2: for i in lower_left_list[1:]: lower_left = lower_left + '\n' + i lower_left = '\n' + lower_left if lower_right_list: lower_right = lower_right_list[0] if len(lower_right_list) >= 2: for i in lower_right_list[1:]: lower_right = lower_right + '\n' + i lower_right = '\n' + lower_right # print(lower_left) # print(lower_right) # print(lower1) return lower1, lower_left, lower_right class ULDs_777(object): def __init__(self, main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str): # print(main_left) self.main_left = ULD(main_left) self.main_right = ULD(main_right) self.main_center = ULD(main_center) # print(main_center) self.lower = ULD(lower) # print('check' , lower_left) self.lower_left = ULD(lower_left) self.lower_right = ULD(lower_right) self.bulk = BULK(bulk_str) return def __str__(self): return ('This is a ULD-777F class') def total_weight(self): totalweight = (self.main_left.weights + self.main_right.weights + self.main_center.weights + self.lower_left.weights + self.lower_right.weights + self.lower.weights + self.bulk.weights) return totalweight def imbalance_weight(self): imbalanceweight = abs(self.main_right.weights + self.lower_right.weights -self.main_left.weights - self.lower_left.weights) return imbalanceweight class ULD(object): def __init__(self, cpm_str): # print(cpm_str) if not (cpm_str == '\n'): self.uld_list = re.split(r'\n', cpm_str)[1:] # print(self.uld_list) self.weights, self.uldstrs = self.uld_normalized() # 调用类内部函数时,不用给形参 else: self.uld_list = [] self.weights = 0 self.uldstrs = '' def uld_normalized(self): total_weight = 0 total_str = '' weight_pattern = r'\/(\d{1,5})' # 单个集装器重量正则表达式 for uld in self.uld_list: # 对集装器进行循环 # print(uld) # 计算重量 if 'NIL' in uld: total_weight += 0 else: # print(len(re.findall(weight_pattern, uld))) weight_str = re.findall(weight_pattern, uld)[0] weight = int(weight_str) # print(weight) total_weight += weight # 生成标准格式CPM容器格式 posi_str = re.findall(r'(-\w{1,4})\/', uld)[0] # print(posi_str) if 'NIL' in uld: total_str += '\n' + posi_str + '/NIL' else: uld_str = '/' + re.findall(r'\/([A-Z]{3}\d{5}[A-Z]{2,3})\/', uld)[0] des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0] shape_str = '/' + re.findall(r'[Q][456ML][RL]?', uld)[0] if re.findall(r'[Q][456ML][RL]?', uld) else '' cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else '' # note_str # 预留附加信息的部分 total_str += '\n' + posi_str + uld_str + '/' + weight_str + des_str + shape_str + cargotype_str # print(total_str) return total_weight, total_str class BULK(object): def __init__(self, cpm_str): self.uld_list = re.split(r'\n', cpm_str) self.weights, self.uldstrs = self.uld_normalized() # 调用类内部函数时,不用给形参 def uld_normalized(self): total_weight = 0 total_str = '' weight_pattern = r'\/(\d{1,5})\/' # 单个集装器重量正则表达式 for uld in self.uld_list: # 对集装器进行循环 # print(uld) # 计算重量 if 'NIL' in uld: total_weight += 0 else: weight_str = re.findall(weight_pattern, uld)[0] weight = int(weight_str) # print(weight) total_weight += weight # 生成标准格式CPM容器格式 # print(posi_str) if 'NIL' in uld: total_str += '\n' + 'BLK' + '/NIL' else: des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0] cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else '' # note_str # 预留附加信息的部分 total_str += '\n' + 'BLK' + '/' + weight_str + des_str + cargotype_str # print(total_str) return total_weight, total_str def test_print_ulds_infos(flight_cpm): print('main left') print(flight_cpm.main_left.weights) print(flight_cpm.main_left.uldstrs[2:]) print('main right') print(flight_cpm.main_right.weights) print(flight_cpm.main_right.uldstrs[2:]) print('main center') print(flight_cpm.main_center.weights) print(flight_cpm.main_center.uldstrs[2:]) print('lower left') print(flight_cpm.lower_left.weights) print(flight_cpm.lower_left.uldstrs[2:]) print('lower right') print(flight_cpm.lower_right.weights) print(flight_cpm.lower_right.uldstrs[2:]) print('lower') print(flight_cpm.lower.weights) print(flight_cpm.lower.uldstrs[2:]) print('bulk') print(flight_cpm.bulk.weights) print(flight_cpm.bulk.uldstrs[0:]) return def cpm_nor_output(flight_cpm, flight_info): flight_info = re.sub(r'-', '', flight_info) cpm_nor_txt = ( 'CPM\n' + flight_info + '\n' + r'M/D RIGHT SIDE' + '\n' + flight_cpm.main_right.uldstrs[1:] + '\n' + r'M/D LEFT SIDE' + '\n' + flight_cpm.main_left.uldstrs[1:] + '\n' + r'M/D CENTER' + '\n' + flight_cpm.main_center.uldstrs[1:] + '\n' + r'L/D RIGHT SIDE' + '\n' + (flight_cpm.lower_right.uldstrs[1:] + '\n' if flight_cpm.lower_right.uldstrs else '') + r'L/D LEFT SIDE' + '\n' + (flight_cpm.lower_left.uldstrs[1:] + '\n' if flight_cpm.lower_left.uldstrs else '') + r'L/D CENTER' + '\n' + flight_cpm.lower.uldstrs[1:] + '\n' + flight_cpm.bulk.uldstrs[1:] + '\n' + 'CPM END' ) return cpm_nor_txt def main(argc, argv, envp): cpm_nor() return if __name__ == '__main__': sys.exit(main(len(sys.argv), sys.argv, os.environ))
以下是txt文件原文,复制到txt文件中,改一下上面代码的文件地址,就可以验证了。
CPM
CA162/01JUN18.B2099.AMS-PEK
PEK
MAIN DECK LEFT SIDE
-AL/PMC43371CA/PEK/1910/T.Q4.TRANSIT
-BL/PMC44072CA/PEK/2010/T.Q5.RMD TRANSIT
-CL/PMC42424CA/PEK/2075/T.Q5.RMD TRANSIT
-DL/PMC45301CA/PEK/2095/T.Q5.RMD TRANSIT
-EL/PMC45873CA/PEK/2100/T.Q5.RMD TRANSIT
-FL/PMC33417CA/PEK/2150/T.Q5.RMD TRANSIT
-GHL/PGA70214CA/PEK/5320/T.Q5.TRANSIT
-JL/PMC42648CA/PEK/2180/T.Q5.RMD TRANSIT
-KL/PMC41763CA/PEK/4365/C.Q5.0
-LLR/PMC32898CA/PEK/1048/C.Q5.0
-ML/PMC45517CA/PEK/2200/T.Q5.RMD TRANSIT
-PL/PMC41874CA/PEK/2360/T.Q4.TRANSIT
-R/PMC43988CA/PEK/2445/C.Q6.0
MAIN DECK RIGHT SIDE
-AR/PMC31985CA/PEK/3460/C.Q4.0
-BR/PMC42256CA/PEK/967/C.Q5.0
-CR/PMC42614CA/PEK/1001/C.Q5.0
-DR/PMC41867CA/PEK/1652/C.Q5.0
-ER/PMC41703CA/PEK/1730/C.Q5.0
-FR/PMC43395CA/PEK/2070/C.Q5.0.OHG FWD 3.OHG AFT 3
-GHR/PGA70195CA/PEK/5760/C.Q5.0
-JR/PMC43872CA/PEK/2620/C.Q5.0.OHG FWD 4.OHG AFT 4
-KR/PMC44475CA/PEK/1865/C.Q5.0
-MR/PMC32909CA/PEK/2685/C.Q5.0.OHG FWD 4.OHG AFT 4
-PR/PMC44124CA/PEK/3515/C.Q4.0
LOWER DECK
-11P/PMC42943CA/PEK/1505/T.TRANSIT
-12P/PMC32590CA/PEK/1685/T.EX MR TRANSIT
-13P/PMC44202CA/PEK/3390/C.QM
-21P/PMC43672CA/PEK/3495/C.QM
-22P/PMC42421CA/PEK/3510/C.QM
-23P/PMC41028CA/PEK/3620/C.QM
-31P/PMC45480CA/PEK/3680/C.QM
-33L/AKE74669CA/PEK/345 -33R/AKE73341CA/PEK/495/M.MAIL
-41L/AKE74301CA/PEK/557/M.MAIL -41R/AKE73803CA/PEK/400/M.MAIL
-41P/PMC45916CA/PEK/4005/T.EX 41P PER TRANSIT
-42P/PMC42374CA/PEK/4275/C.QM
BULK/NIL
BULK/PEK/780/C.
SI/TOTAL LOAD 93835KG - UNDERLOAD 9367KG
SI/ALL WEIGHTS IN KG - ULD WEIGHTS INCLUDED - MEASUREMENTS IN CMS
SI/ZFW 234579 LIZFW 35 MACAFW 24,3
SI/TOW 338084 LITOW 33 MACTOW 25,5
SI/TO FUEL 103505 TRIP FUEL 90217
SI/STAB TRIM 6,0
SI/LATERAL IMBALANCE 15 KG
CPM END