#coding:gbk
#导入处理excel的模块
import xlrd
#定义哪些字段需要判断,只支持时间字段
toSureColArray = ['CREATE_TIME','MODIFY_TIME']
#确定某个字段是否在数组中
def isColInArray(colName, colArray):
for i in range(0, len(colArray)):
if (colName == colArray[i]):
return 1
else:
return 0
#定义子sheet字段属性映射处理函数
#入参为文件描述符、、源表列字段、和目表列字段
def childSheetColFunc(fOutput, sourceTabCol, destTabCol,destTableSureArray):
for i in range(2,len(sourceTabCol)):
if len(destTabCol[i]) != 0:
if len(sourceTabCol[i]) != 0:
#在子表中,如果浪潮int_id对应的直真字段英文名称为空,在destTableSureArry不存在INT_ID属性
destTableSureArray.append(str(destTabCol[i]).upper())
fOutput.write("\t\t\t\t\tA."+str(sourceTabCol[i]).upper()+" "+str(destTabCol[i]).upper()+",\n")
else:
continue
else:
continue
#for i in range(0,len(destTableSureArray)):
# print("******************")
# print(str(destTableSureArray[i]))
# print("******************")
#对creat_time和modify_time做单独处理
for i in range(0,len(toSureColArray)):
if isColInArray(toSureColArray[i],destTableSureArray) == 0:
fOutput.write("\t\t\t\t\tSYSDATE "+toSureColArray[i]+",\n")
destTableSureArray.append(toSureColArray[i])
#定义子sheet目标数据插入函数主键模块
#入参为文件描述符、源表列字段、目表源字段
def childSheetColDesKeyFunc(fOutput, sourceTabCol, destTabCol):
#如果为空表,此处会报下标溢出错误,所以入参必须保证表中有数据
if len(sourceTabCol[2]) != 0:
fOutput.write("\t\t\t\t<col name=\""+str(destTabCol[2]).upper()+"\">["+str(destTabCol[2]).upper()+"]</col>\n")
fOutput.write("\t\t\t\t<col name=\"STATEFLAG\">[STATEFLAG]</col>\n")
#定义子sheet目标函数插入函数非主键模块
def childSheetColDesNKeyFunc(fOutput, sourceTabCol, destTableSureArray):
if len(sourceTabCol[2]) != 0:
for i in range(1,len(destTableSureArray)):
fOutput.write("\t\t\t\t<col name=\""+destTableSureArray[i]+"\">["+destTableSureArray[i]+"]</col>\n")
else:
for i in range(0,len(destTableSureArray)):
fOutput.write("\t\t\t\t<col name=\""+destTableSureArray[i]+"\">["+destTableSureArray[i]+"]</col>\n")
#定义子sheet处理函数
def childSheetFunc(sheetName, sourceTable, destTable):
#根据sheet名打开表
shtInstance=workBook.sheet_by_name(sheetName)
#存放最终哪些浪潮字段需要映射
destTableSureArray = []
langchaoTabCol=shtInstance.col_values(4)
zhizhenTabCol=shtInstance.col_values(6)
print("***********现在开始生成"+str(destTable).upper()+".xml 文件***********")
path="e:\\"+destTable.upper()+".xml"
fOutput=open(path,"w")
fOutput.write("<?xml version=\"1.0\" encoding=\"gb2312\"?>\n")
fOutput.write("<Conf>\n")
fOutput.write("\t<stage id=\"1\" describe=\""+str(sourceTable).upper()+"\" doUpdate=\"$doUpdate$\">\n")
fOutput.write("\t\t<drawout DBid=\"ZZ\">\n")
fOutput.write("\t\t\t<![CDATA[\n")
fOutput.write("\t\t\t\tSELECT\n")
fOutput.write("\t\t\t\t\t0 STATEFLAG,\n")
childSheetColFunc(fOutput, zhizhenTabCol, langchaoTabCol,destTableSureArray)
#fOutput.write("\t\t\t\t\tSYSDATE STATEFLAG\n")
fOutput.write("\t\t\t\tFROM INFORMIX."+str(sourceTable).upper()+" A\n")
fOutput.write("\t\t\t\tWHERE 1=1\n")
fOutput.write("\t\t\t\t\tAND ROW_NUM < 2\n")
fOutput.write("\t\t\t]]>\n")
fOutput.write("\t\t</drawout>\n")
fOutput.write("\t\t<Load DBid=\"IRMS_143\" tableName=\""+str(destTable).upper()+"\"\n")
fOutput.write("\t\t\t<Dimension>\n")
childSheetColDesKeyFunc(fOutput, zhizhenTabCol, langchaoTabCol)
fOutput.write("\t\t\t<Dimension>\n")
fOutput.write("\t\t\t<Cols>\n")
childSheetColDesNKeyFunc(fOutput, zhizhenTabCol, destTableSureArray)
fOutput.write("\t\t\t<Cols>\n")
fOutput.write("\t\t</Load>\n")
fOutput.write("\t</stage>\n")
fOutput.write("</Conf>\n")
fOutput.close()
print("***********"+str(destTable).upper()+".xml 文件生成完成***********")
#根据主表获取子表信息并处理
def mainSheetAndProc(mainSheet):
sheetName=mainSheet.col_values(2)
sourceTabName=mainSheet.col_values(5)
destTabName=mainSheet.col_values(3)
curNum = 0
for i in range(2,len(sheetName)):
if len(sheetName[i]) ==0:
continue
curNum += 1
print(">>>>>>开始生成第"+str(curNum)+"个文件")
childSheetFunc(sheetName[i], sourceTabName[i], destTabName[i])
#入口函数
if __name__=="__main__":
workBook=xlrd.open_workbook("E:\\内线比对模型.xls")
mainSheet=workBook.sheets()[0]
mainSheetAndProc(mainSheet)
python脚本从excel表到处数据,生成指定格式的文件