练一练正则化匹配,实现将解析CPM报,并生成固定格式CPM报文

博客太久没更新了,是要拔拔草。最近刚好在比较深入的学字符串正则化匹配的内容,顺便完成一个小作品。就是解析CPM报文的功能。(CPM is short for Container Pallet Message)。

好了,照样话不多说,贴上代码

# -*- coding=utf-8 -*-
# using python 3
# Author: Sysuzzd
# Under Project: HS ON ML WITH SCIKIT AND TENSORFLOW
# @Time : 2019-6-29  22:35


# *******************************************   Purpose & Illustration   ***********************************************
# 解析CPM报,并生成标准格式CPM
# **********************************************************************************************************************

import sys, os
import re

def cpm_nor():
    # 定义cpm文件地址
    cpm_file_route = r'D:\*PythonLearn\HS ON ML WITH SCIKIT AND TENSORFLOW\Hands-On.Machine.Learning.with.Scikit-Learn.and.TensorFlow.2017\handson-ml-master\CPM4.txt'
    with open(cpm_file_route, 'r') as cpm_file:
        cpm_str = cpm_file.read()
    # print(type(cpm_str))   # out:  <class 'str'>

    # 定义777-200F机型cpm各货舱集装器正则匹配
    flight_pattern = r'\w{2}\d{3,4}\/\d{2}\w{3}\d{2}\.\w\d{4}.+\w{3}\n'   # 定义航班信息模式
    flight_info = re.findall(flight_pattern, cpm_str)[0][:-1]
    # print(flight_info)

    maindeck_left_pattern = r'(MAIN\sDECK\sLEFT\sSIDE.*)\n.+DECK\sRIGHT'
    maindeck_left_str = re.findall(maindeck_left_pattern, cpm_str, re.S | re.M)[0]  # class list  re.S 代表 . 可以代表任何字符,包括换行符   re.M 表示^ 和$ 是每行的开头结束
    # print(maindeck_left_str)
    # print(re.split(r'\n', maindeck_left_str))
    # print(r'-R/' in maindeck_left_str)

    # maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\n.+LOWER\sDECK'    # 这是错误的正则匹配,会导致少掉PR位置
    maindeck_right_pattern = r'(MAIN\sDECK\sRIGHT\sSIDE.*)\nLOWER\sDECK'
    maindeck_right_str = re.findall(maindeck_right_pattern, cpm_str, re.S | re.M)[0]
    # print('check', maindeck_right_str)

    lowerdeck_pattern = r'(LOWER\sDECK.*)\nBULK'
    lowerdeck_str = re.findall(lowerdeck_pattern, cpm_str, re.S | re.M)[0]
    while 'BULK' in lowerdeck_str:
        lowerdeck_str = re.findall(r'(.*?)\nBULK', lowerdeck_str, re.S)[0]
    # print(lowerdeck_str)

    bulk_pattern = r'(BULK.*\nBULK?.*)\n'
    bulk_str = re.findall(bulk_pattern, cpm_str, re.M)[0]
    # print(bulk_str)
    # print(re.split(r'\n', bulk_str))

    main_left, main_right, main_center = check_main_center(maindeck_left_str, maindeck_right_str)
    # print(main_center)
    # print(main_left)
    # print(main_right)
    lower, lower_left, lower_right = check_lower_sides(lowerdeck_str)
    # print('检查' , lower_left)
    # print('检查' , re.split(r'\n', '\n'+lower_left)[1:])
    # print(lower)

    # 定义集装器字典
    # print('check' , lower_left)
    # print(lower_left == '\n')
    flight_cpm = ULDs_777(main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str)
    # print(main_center)
    # print(flight_cpm.main_left.uld_list)
    # test_print_ulds_infos(flight_cpm)    # 测试打印各uld的cpm格式是否正确和完整
    print('Total weight of this flight is:', flight_cpm.total_weight(), 'KG')
    print('The later imbalance weight of this flight is:', flight_cpm.imbalance_weight() , 'KG')
    # 匹配集装器存进字典

    # 按标准模式输出cpm
    cpm_nor_txt = cpm_nor_output(flight_cpm, flight_info)
    print(cpm_nor_txt)
    return

def check_main_center(left, right):
    # 待补充非R位置,中线装载时的情况
    if r'-R/' in left:
        center_pattern = r'-R.*|-[A-Z]LR.*'
        center_sub_pattern = r'\n-R.*|\n-[A-Z]LR.*'    # 记得要加\n,不然会留下空的一行
        center = ''
        # print('leng', len(re.findall(center_pattern, left)))
        for i in re.findall(center_pattern, left):
            center = center + '\n' + i
        # center = '\n' + center
        left1 = re.sub(center_sub_pattern, '', left)
    right1 = right
    return left1, right1, center

def check_lower_sides(lower):
    lower_left_pattern = r'(-\d{2}L[^\sR]*)\s+'    #  lower_left_pattern = r'(-\d{2}L[^\s]*)\s+' 少了1个R会导致将LR的类型也算到L中去
    lower_right_pattern = r'(-\d{2}R.*)\n'
    lower_sides_sub_pattern = r'-\d{2}L.*-\d{2}R.*\n'
    lower_left_list = re.findall(lower_left_pattern, lower)
    # print('检查', lower_left_list)
    lower_right_list = re.findall(lower_right_pattern, lower)
    lower1 = re.sub(lower_sides_sub_pattern, '', lower)
    # print(lower_left_list)
    # print(lower_right_list)

    lower_left = ''
    lower_right = ''

    if lower_left_list:
        lower_left = lower_left_list[0]
        if len(lower_left_list) >= 2:
            for i in lower_left_list[1:]:
                lower_left = lower_left + '\n' + i
    lower_left = '\n' + lower_left

    if lower_right_list:
        lower_right = lower_right_list[0]
        if len(lower_right_list) >= 2:
            for i in lower_right_list[1:]:
                lower_right = lower_right + '\n' + i
    lower_right = '\n' + lower_right

    # print(lower_left)
    # print(lower_right)
    # print(lower1)
    return lower1, lower_left, lower_right

class ULDs_777(object):

    def __init__(self, main_left, main_right, main_center, lower, lower_left, lower_right, bulk_str):
        # print(main_left)
        self.main_left = ULD(main_left)
        self.main_right = ULD(main_right)
        self.main_center = ULD(main_center)
        # print(main_center)
        self.lower = ULD(lower)
        # print('check' , lower_left)
        self.lower_left = ULD(lower_left)
        self.lower_right = ULD(lower_right)
        self.bulk = BULK(bulk_str)
        return

    def __str__(self):
        return ('This is a ULD-777F class')

    def total_weight(self):
        totalweight = (self.main_left.weights + self.main_right.weights + self.main_center.weights
                      + self.lower_left.weights + self.lower_right.weights + self.lower.weights
                      + self.bulk.weights)
        return totalweight

    def imbalance_weight(self):
        imbalanceweight = abs(self.main_right.weights + self.lower_right.weights
                              -self.main_left.weights - self.lower_left.weights)
        return imbalanceweight

class ULD(object):

    def __init__(self, cpm_str):
        # print(cpm_str)
        if not (cpm_str == '\n'):
            self.uld_list = re.split(r'\n', cpm_str)[1:]
            # print(self.uld_list)
            self.weights, self.uldstrs = self.uld_normalized()    # 调用类内部函数时,不用给形参
        else:
            self.uld_list = []
            self.weights = 0
            self.uldstrs = ''
    def uld_normalized(self):
        total_weight = 0
        total_str = ''
        weight_pattern = r'\/(\d{1,5})'   # 单个集装器重量正则表达式
        for uld in self.uld_list:     # 对集装器进行循环
            # print(uld)

            # 计算重量
            if 'NIL' in uld:
                total_weight += 0
            else:
                # print(len(re.findall(weight_pattern, uld)))
                weight_str = re.findall(weight_pattern, uld)[0]
                weight = int(weight_str)
                # print(weight)
                total_weight += weight
            # 生成标准格式CPM容器格式
            posi_str = re.findall(r'(-\w{1,4})\/', uld)[0]
            # print(posi_str)
            if 'NIL' in uld:
                total_str += '\n' + posi_str + '/NIL'
            else:
                uld_str = '/' + re.findall(r'\/([A-Z]{3}\d{5}[A-Z]{2,3})\/', uld)[0]
                des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0]
                shape_str = '/' + re.findall(r'[Q][456ML][RL]?', uld)[0] if re.findall(r'[Q][456ML][RL]?', uld) else ''
                cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else ''
                # note_str      # 预留附加信息的部分
                total_str += '\n' + posi_str + uld_str + '/' + weight_str + des_str + shape_str + cargotype_str
        # print(total_str)

        return total_weight, total_str

class BULK(object):

    def __init__(self, cpm_str):
        self.uld_list = re.split(r'\n', cpm_str)
        self.weights, self.uldstrs = self.uld_normalized()    # 调用类内部函数时,不用给形参
    def uld_normalized(self):
        total_weight = 0
        total_str = ''
        weight_pattern = r'\/(\d{1,5})\/'   # 单个集装器重量正则表达式
        for uld in self.uld_list:     # 对集装器进行循环
            # print(uld)

            # 计算重量
            if 'NIL' in uld:
                total_weight += 0
            else:
                weight_str = re.findall(weight_pattern, uld)[0]
                weight = int(weight_str)
                # print(weight)
                total_weight += weight
            # 生成标准格式CPM容器格式
            # print(posi_str)
            if 'NIL' in uld:
                total_str += '\n' + 'BLK' + '/NIL'
            else:
                des_str = '/' + re.findall(r'\/([A-Z]{3})\/', uld)[0]
                cargotype_str = '/' + re.findall(r'([CMXT])\.', uld)[0] if re.findall(r'[CMXT]\.', uld) else ''
                # note_str      # 预留附加信息的部分
                total_str += '\n' + 'BLK' + '/' + weight_str + des_str + cargotype_str
        # print(total_str)

        return total_weight, total_str

def test_print_ulds_infos(flight_cpm):
    print('main left')
    print(flight_cpm.main_left.weights)
    print(flight_cpm.main_left.uldstrs[2:])
    print('main right')
    print(flight_cpm.main_right.weights)
    print(flight_cpm.main_right.uldstrs[2:])
    print('main center')
    print(flight_cpm.main_center.weights)
    print(flight_cpm.main_center.uldstrs[2:])
    print('lower left')
    print(flight_cpm.lower_left.weights)
    print(flight_cpm.lower_left.uldstrs[2:])
    print('lower right')
    print(flight_cpm.lower_right.weights)
    print(flight_cpm.lower_right.uldstrs[2:])
    print('lower')
    print(flight_cpm.lower.weights)
    print(flight_cpm.lower.uldstrs[2:])
    print('bulk')
    print(flight_cpm.bulk.weights)
    print(flight_cpm.bulk.uldstrs[0:])
    return

def cpm_nor_output(flight_cpm, flight_info):
    flight_info = re.sub(r'-', '', flight_info)
    cpm_nor_txt = (
                   'CPM\n'
                   + flight_info + '\n'
                   + r'M/D RIGHT SIDE' + '\n'
                   + flight_cpm.main_right.uldstrs[1:] + '\n'
                   + r'M/D LEFT SIDE' + '\n'
                   + flight_cpm.main_left.uldstrs[1:] + '\n'
                   + r'M/D CENTER' + '\n'
                   + flight_cpm.main_center.uldstrs[1:] + '\n'
                   + r'L/D RIGHT SIDE' + '\n'
                   + (flight_cpm.lower_right.uldstrs[1:] + '\n' if flight_cpm.lower_right.uldstrs else '')
                   + r'L/D LEFT SIDE' + '\n'
                   + (flight_cpm.lower_left.uldstrs[1:] + '\n' if flight_cpm.lower_left.uldstrs else '')
                   + r'L/D CENTER' + '\n'
                   + flight_cpm.lower.uldstrs[1:] + '\n'
                   + flight_cpm.bulk.uldstrs[1:] + '\n'
                   + 'CPM END'
                  )
    return cpm_nor_txt

def main(argc, argv, envp):
    cpm_nor()
    return


if __name__ == '__main__':
    sys.exit(main(len(sys.argv), sys.argv, os.environ))

 

 

以下是txt文件原文,复制到txt文件中,改一下上面代码的文件地址,就可以验证了。

 

CPM
CA162/01JUN18.B2099.AMS-PEK
PEK
MAIN DECK LEFT SIDE
-AL/PMC43371CA/PEK/1910/T.Q4.TRANSIT
-BL/PMC44072CA/PEK/2010/T.Q5.RMD TRANSIT
-CL/PMC42424CA/PEK/2075/T.Q5.RMD TRANSIT
-DL/PMC45301CA/PEK/2095/T.Q5.RMD TRANSIT
-EL/PMC45873CA/PEK/2100/T.Q5.RMD TRANSIT
-FL/PMC33417CA/PEK/2150/T.Q5.RMD TRANSIT
-GHL/PGA70214CA/PEK/5320/T.Q5.TRANSIT
-JL/PMC42648CA/PEK/2180/T.Q5.RMD TRANSIT
-KL/PMC41763CA/PEK/4365/C.Q5.0
-LLR/PMC32898CA/PEK/1048/C.Q5.0
-ML/PMC45517CA/PEK/2200/T.Q5.RMD TRANSIT
-PL/PMC41874CA/PEK/2360/T.Q4.TRANSIT
-R/PMC43988CA/PEK/2445/C.Q6.0
MAIN DECK RIGHT SIDE
-AR/PMC31985CA/PEK/3460/C.Q4.0
-BR/PMC42256CA/PEK/967/C.Q5.0
-CR/PMC42614CA/PEK/1001/C.Q5.0
-DR/PMC41867CA/PEK/1652/C.Q5.0
-ER/PMC41703CA/PEK/1730/C.Q5.0
-FR/PMC43395CA/PEK/2070/C.Q5.0.OHG FWD 3.OHG AFT 3
-GHR/PGA70195CA/PEK/5760/C.Q5.0
-JR/PMC43872CA/PEK/2620/C.Q5.0.OHG FWD 4.OHG AFT 4
-KR/PMC44475CA/PEK/1865/C.Q5.0
-MR/PMC32909CA/PEK/2685/C.Q5.0.OHG FWD 4.OHG AFT 4
-PR/PMC44124CA/PEK/3515/C.Q4.0
LOWER DECK
-11P/PMC42943CA/PEK/1505/T.TRANSIT    
-12P/PMC32590CA/PEK/1685/T.EX MR TRANSIT    
-13P/PMC44202CA/PEK/3390/C.QM    
-21P/PMC43672CA/PEK/3495/C.QM    
-22P/PMC42421CA/PEK/3510/C.QM    
-23P/PMC41028CA/PEK/3620/C.QM    
-31P/PMC45480CA/PEK/3680/C.QM    
-33L/AKE74669CA/PEK/345    -33R/AKE73341CA/PEK/495/M.MAIL
-41L/AKE74301CA/PEK/557/M.MAIL    -41R/AKE73803CA/PEK/400/M.MAIL
-41P/PMC45916CA/PEK/4005/T.EX 41P PER TRANSIT    
-42P/PMC42374CA/PEK/4275/C.QM    
BULK/NIL
BULK/PEK/780/C.
SI/TOTAL LOAD 93835KG  - UNDERLOAD 9367KG
SI/ALL WEIGHTS IN KG - ULD WEIGHTS INCLUDED - MEASUREMENTS IN CMS
SI/ZFW 234579  LIZFW 35  MACAFW 24,3
SI/TOW 338084  LITOW 33  MACTOW 25,5
SI/TO FUEL 103505  TRIP FUEL 90217
SI/STAB TRIM 6,0
SI/LATERAL IMBALANCE 15 KG
CPM END

上一篇:gMIS吉密斯十年执念:Lower Costs较低成本Better Productivity较高效率


下一篇:Python Lambda