文件名:GM_PCS_A250_MD2_UHAST_96H_F2BIFX
PRODUCT_VERSION | LOT | TEST_ITEM | TEST_NODE | FACTORY | RETEST | FT | PROGRAM | DEVICES_TIME | SITE_NUM | T_TIME | TEST_NUM | TEST_NAME | VALUE | PART_ID | SOFT_BIN | LIMIT_L | LIMIT_U | UNIT | PASSFG |
A250 | MD2 | UHAST | 96H | F2BIFX | F2BIFX后可能还有_XXX,如果存在就截取,不存在就空 | NA | 变量program | endingtime | 全给1 | 变量t_time | 变量test_num |
import pandas as pd
import os
import csv
def parse_excel(df, file, program, lot_id, ending_time):
for idx, rows in df.iterrows():
if idx < 3:
continue
# print(rows)
part_id = rows['PART_ID']
passfg = rows['PASSFG']
# print(passfg)
soft_bin = rows['SOFT_BIN']
t_time = rows['T_TIME']
test_num = rows['TEST_NUM']
line = [part_id, passfg, soft_bin, t_time, test_num]
for x in range(len(title)):
nt = title[x]
if len(nt.strip()) <= 0:
continue
now_line = line.copy()
now_line.extend([nt, rows[nt], unit[x], limit_l[x], limit_u[x], program, lot_id, ending_time, file])
print(now_line)
datas.append(now_line)
# 初始化,start_line 为数据开始行
start_line = 'SITE_NUM'
ending_time = ''
lot_id = ''
program = ''
# 列头
header = ['PART_ID', 'PASSFG', 'SOFT_BIN', 'T_TIME', 'TEST_NUM', 'TEST_NAME', 'VALUE',
'UNIT', 'LIMITL', 'LIMITU', 'PROGRAM', 'LOT_ID', 'ENDING_TIME', 'FILE_NAME']
# 输出路径
out_file_name = ''
# 读取路径
file_dir = r''
files = os.listdir(file_dir)
datas = []
for file in files:
if '.csv' not in file or '~$' in file or 'result' in file:
continue
xlsx_file = os.path.join(file_dir, file)
print('当前处理',xlsx_file)
read_data = []
head = None
with open(xlsx_file, encoding='gbk') as f:
row = csv.reader(f, delimiter=',')
for idx, r in enumerate(row):
if len(r) <= 0:
continue
if head is not None:
read_data.append(r)
first = r[0]
if 'Program' in first:
program = first.split(':')[-1].strip()
elif 'Ending Time' in first:
ending_time = first.lstrip('Ending Time:')
elif 'Lot Id' in first:
lot_id = first.split(':')[-1].strip()
elif start_line in first:
head = r
print('当前开始行:', head)
df = pd.DataFrame(read_data, columns=head, dtype=object)
title = list(df.columns.values[6:])
# print(title)
unit = df.iloc[0, 6:].values
# print(unit)
limit_l = df.iloc[1, 6:].values
# print(limit_l)
limit_u = df.iloc[2, 6:].values
# print(limit_u)
parse_excel(df, file, program, lot_id, ending_time)
to_df = pd.DataFrame(datas)
to_df.to_csv(out_file_name, header=header,encoding='gbk')