excel批量处理

文件名:GM_PCS_A250_MD2_UHAST_96H_F2BIFX

PRODUCT_VERSION LOT TEST_ITEM TEST_NODE FACTORY RETEST FT PROGRAM DEVICES_TIME SITE_NUM T_TIME TEST_NUM TEST_NAME VALUE PART_ID SOFT_BIN LIMIT_L LIMIT_U UNIT PASSFG
A250 MD2 UHAST 96H F2BIFX F2BIFX后可能还有_XXX,如果存在就截取,不存在就空 NA 变量program endingtime 全给1 变量t_time 变量test_num

import pandas as pd
import os
import csv
 
 
def parse_excel(df, file, program, lot_id, ending_time):
    for idx, rows in df.iterrows():
        if idx < 3:
            continue
        # print(rows)
        part_id = rows['PART_ID']
        passfg = rows['PASSFG']
        # print(passfg)
        soft_bin = rows['SOFT_BIN']
        t_time = rows['T_TIME']
        test_num = rows['TEST_NUM']
        line = [part_id, passfg, soft_bin, t_time, test_num]
        for x in range(len(title)):
            nt = title[x]
            if len(nt.strip()) <= 0:
                continue
            now_line = line.copy()
            now_line.extend([nt, rows[nt], unit[x], limit_l[x], limit_u[x], program, lot_id, ending_time, file])
            print(now_line)
            datas.append(now_line)
 
 
# 初始化,start_line 为数据开始行
start_line = 'SITE_NUM'
ending_time = ''
lot_id = ''
program = ''
# 列头
header = ['PART_ID', 'PASSFG', 'SOFT_BIN', 'T_TIME', 'TEST_NUM', 'TEST_NAME', 'VALUE',
          'UNIT', 'LIMITL', 'LIMITU', 'PROGRAM', 'LOT_ID', 'ENDING_TIME', 'FILE_NAME']
# 输出路径
out_file_name = ''
# 读取路径
file_dir = r''
files = os.listdir(file_dir)
datas = []
for file in files:
    if '.csv' not in file or '~$' in file or 'result' in file:
        continue
    xlsx_file = os.path.join(file_dir, file)
    print('当前处理',xlsx_file)
    read_data = []
    head = None
    with open(xlsx_file, encoding='gbk') as f:
        row = csv.reader(f, delimiter=',')
        for idx, r in enumerate(row):
            if len(r) <= 0:
                continue
            if head is not None:
                read_data.append(r)
            first = r[0]
            if 'Program' in first:
                program = first.split(':')[-1].strip()
            elif 'Ending Time' in first:
                ending_time = first.lstrip('Ending Time:')
            elif 'Lot Id' in first:
                lot_id = first.split(':')[-1].strip()
            elif start_line in first:
                head = r
    print('当前开始行:', head)
    df = pd.DataFrame(read_data, columns=head, dtype=object)
    title = list(df.columns.values[6:])
    # print(title)
    unit = df.iloc[0, 6:].values
    # print(unit)
    limit_l = df.iloc[1, 6:].values
    # print(limit_l)
    limit_u = df.iloc[2, 6:].values
    # print(limit_u)
    parse_excel(df, file, program, lot_id, ending_time)
 
to_df = pd.DataFrame(datas)
to_df.to_csv(out_file_name, header=header,encoding='gbk')

上一篇:javascript – 动态设置dojox.grid.datagrid标题列宽


下一篇:c# – 动态地使用TextBox过滤DataGrid的名称