python-excel处理

'''
要确定excel表格中的工作簿中工作表的数量、名称和每个工作表中行列的数量
open_workbook 模块用来读取和分析Excel文件
'''
import sys
from xlrd import open_workbook
input_file=sys.argv[1]
workbook=open_workbook(input_file)
print("Number of worksheet:",workbook.nsheets)
for worksheet in workbook.sheets():
    print('Worksheet name:',worksheet.name,'  number of rows in worksheet:',worksheet.nrows,'  number of cols in worksheet',worksheet.ncols)


'''
处理单个工作表
读写Excel文件
worksheet.cell_value();返回数据
'''
import sys
from xlrd import open_workbook
from xlwt import Workbook
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy1')
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    for row_index in range(worksheet.nrows):
        for column_index in range(worksheet.ncols):
            output_worksheet.write(row_index,column_index,worksheet.cell_value(row_index,column_index))
output_workbook.save(output_file)


'''
处理单个工作表
读写Excel文件-格式化数据
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy2')
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    for row_index in range(worksheet.nrows):
        row_list_output=[]
        for column_index in range(worksheet.ncols):
            if worksheet.cell_type(row_index,column_index)==3:
                date_cell=xldate_as_tuple(worksheet.cell_value(row_index,column_index),workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%m-%d-%Y')
                row_list_output.append(date_cell)
                output_worksheet.write(row_index,column_index,date_cell)
            else:
                non_date_cell=worksheet.cell_value(row_index,column_index)
                row_list_output.append(non_date_cell)
                output_worksheet.write(row_index,column_index,non_date_cell)
output_workbook.save(output_file)


'''
筛选特定行-行中的值满足某个条件
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy3')
sales_amount_index=3
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    header=worksheet.row_values(0)
    data.append(header)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        sales_amount=worksheet.cell_value(row_index,sales_amount_index)
        if sales_amount>1400.0:
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
筛选特定行-行中的值属于某个集合
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy4')
important_dates=['01/24/2013', '01/31/2013']
purchase_date_index=4
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    heade=worksheet.row_values(0)
    data=[]
    data.append(heade)
    for row_index in range(1,worksheet.nrows):
        purchase_datetime=xldate_as_tuple(worksheet.cell_value(row_index,purchase_date_index),workbook.datemode)
        purchase_date=date(*purchase_datetime[0:3]).strftime('%m/%d/%Y')
        row_list=[]
        if purchase_date in important_dates:
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%m/%d/%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
筛选特定行-行中的值属匹配于某个格式(正则表达式)
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
import re
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy5')
pattern=re.compile(r'(?P<my_pattern>^J.*)')
customer_name_column_index=1
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    head=worksheet.row_values(0)
    data.append(head)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        if pattern.search(worksheet.cell_value(row_index,customer_name_column_index)):
            for column_index in range(worksheet.ncols):
                cell_value=worksheet.cell_value(row_index,column_index)
                cell_type=worksheet.cell_type(row_index,column_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for elemnt_list,element in enumerate(output_list):
            output_worksheet.write(list_index,elemnt_list,element)
output_workbook.save(output_file)


'''
选取特定的列-列索引值
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy6')
want_column=[1,4]
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[]
    for row_index in range(worksheet.nrows):
        row_list=[]
        for column_index in want_column:
            cell_value=worksheet.cell_value(row_index,column_index)
            cell_type=worksheet.cell_type(row_index,column_index)
            if cell_type==3:
                date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                row_list.append(date_cell)
            else:
                row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
选取特定的列-列标题
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
putput_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy7')
want_columns=['Customer ID', 'Purchase Date']
with open_workbook(input_file) as workbook:
    worksheet=workbook.sheet_by_name('january_2013')
    data=[want_columns]
    header_list=worksheet.row_values(0)
    header_index_list=[]
    for header_index in range(len(header_list)):
        if header_list[header_index] in want_columns:
            header_index_list.append(header_index)
    for row_index in range(1,worksheet.nrows):
        row_list=[]
        for column_index in header_index_list:
            cell_value=worksheet.cell_value(row_index,column_index)
            cell_type=worksheet.cell_type(row_index,column_index)
            if cell_type==3:
                date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                row_list.append(date_cell)
            else:
                row_list.append(cell_value)
        if row_list:
            data.append(row_list)
    for index_list,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(index_list,element_list,element)
output_workbook.save(putput_file)


'''
在所有工作表中筛选特定行
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_wroksheet=output_workbook.add_sheet('copy8')
sales_index=3
threshold=2000.0
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[]
    for worksheet in workbook.sheets():
        if first_worksheet:
            heade=worksheet.row_values(0)
            data.append(heade)
            first_worksheet=False
        for row_index in range(1,worksheet.nrows):
            row_list=[]
            sale_amount=worksheet.cell_value(row_index,sales_index)
            if sale_amount>threshold:
                for column_index in range(worksheet.ncols):
                    cell_value = worksheet.cell_value(row_index, column_index)
                    cell_type=worksheet.cell_type(row_index,column_index)
                    if cell_type==3:
                        date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                        date_cell=date(*date_cell[0:3]).strftime('%d-%m-%Y')
                        row_list.append(date_cell)
                    else:
                        row_list.append(cell_value)
            if row_list:
                data.append(row_list)
    for index_list,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_wroksheet.write(index_list,element_list,element)
output_workbook.save(output_file)


'''
在所有工作表中选定特定的列
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy9')
want_columns=['Customer Name', 'Sale Amount']
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[want_columns]
    index_of_cols_to_keep=[]
    for worksheet in workbook.sheets():
        if first_worksheet:
            header=worksheet.row_values(0)
            for columns_index in range(len(header)):
                if header[columns_index] in want_columns:
                    index_of_cols_to_keep.append(columns_index)
            first_worksheet=False
        for row_index in range(1,worksheet.nrows):
            row_list=[]
            for columns_index in index_of_cols_to_keep:
                cell_value=worksheet.cell_value(row_index,columns_index)
                cell_type=worksheet.cell_type(row_index,columns_index)
                if cell_type==3:
                    date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                    date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                    row_list.append(date_cell)
                else:
                    row_list.append(cell_value)
            if row_list:
                data.append(row_list)
    for list_index,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
在excel工作簿中读取一组工作表
在一组工作表中筛选特定行
从第一个和第二个工作表中筛选出销售额大于 $1900.00 的那些行
'''
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_file=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy10')
threshold=1900.0
want_sheet=[0,1]
want_columns_index=3
first_worksheet=True
with open_workbook(input_file) as workbook:
    data=[]
    for sheet_index in range(workbook.nsheets):
        if sheet_index in want_sheet:
            worksheet=workbook.sheet_by_index(sheet_index)
            if first_worksheet:
                header_row=worksheet.row_values(0)
                data.append(header_row)
                first_worksheet=False
            for row_index in range(1,worksheet.nrows):
                row_list=[]
                sale_amount=worksheet.cell_value(row_index,want_columns_index)
                if sale_amount>threshold:
                    for column_index in range(worksheet.ncols):
                        cell_value=worksheet.cell_value(row_index,column_index)
                        cell_type=worksheet.cell_type(row_index,column_index)
                        if cell_type==3:
                            date_cell=xldate_as_tuple(cell_value,workbook.datemode)
                            date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                            row_list.append(date_cell)
                        else:
                            row_list.append(cell_value)
                if row_list:
                    data.append(row_list)
    for list_index ,output_list in enumerate(data):
        for element_list,element in enumerate(output_list):
            output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
处理多个工作簿
工作表计数以及每个工作表中的行列计数
想知道一个文件夹中工作簿的数量,每个工作簿中工作表的数量,以及每个工作表中行与列的数量
'''
import glob
import sys
import os
from xlrd import open_workbook
input_directionary=sys.argv[1]
workbook_counter=0
for input_file in glob.glob(os.path.join(input_directionary,'*.xls*')):
    workbook=open_workbook(input_file)
    print('Workbook Name: %s ' % os.path.basename(input_file))
    print('Number of worksheet: %d' % workbook.nsheets)
    for worksheet in workbook.sheets():
        print('Worksheet Name:',worksheet.name,' worksheet rows: ',worksheet.nrows,' worksheet cols:',worksheet.ncols)
    workbook_counter=workbook_counter+1
print('Number of workbook: %d' %(workbook_counter))


'''
从多个工作簿中连接数据
将多个工作簿中所有工作表的数据垂直连接成一个输出文件
'''
import glob
import os
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
from datetime import date
input_directionary=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worksheet=output_workbook.add_sheet('copy11')
data=[]
first_worksheet=True
for input_file in glob.glob(os.path.join(input_directionary,'*.xls*')):
    print('Workbook Name:',os.path.basename(input_file))
    with open_workbook(input_file) as workbook:
        for worksheet in workbook.sheets():
            if first_worksheet:
                header_row=worksheet.row_values(0)
                data.append(header_row)
                first_worksheet=False
            for row_index in range(1,worksheet.nrows):
                row_list=[]
                for column_index in range(worksheet.ncols):
                    ceLl_value=worksheet.cell_value(row_index,column_index)
                    ceLl_type=worksheet.cell_type(row_index,column_index)
                    if ceLl_type==3:
                        date_cell=xldate_as_tuple(ceLl_value,workbook.datemode)
                        date_cell=date(*date_cell[0:3]).strftime('%d-%m-%y')
                        row_list.append(date_cell)
                    else:
                        row_list.append(ceLl_value)
                if row_list:
                    data.append(row_list)
for list_index ,output_list in enumerate(data):
    for element_list,element in enumerate(output_list):
        output_worksheet.write(list_index,element_list,element)
output_workbook.save(output_file)


'''
为每个工作簿和工作表计算总数和均值
为多个工作簿计算工作表级别和工作簿级别的统计量
'''
import glob
import os
import sys
from xlrd import open_workbook,xldate_as_tuple
from xlwt import Workbook
input_floder=sys.argv[1]
output_file=sys.argv[2]
output_workbook=Workbook()
output_worsheet=output_workbook.add_sheet('sums_and_avergae')
all_data=[]
sales_amount_index=3
header=['workbook', 'worksheet', 'worksheet_total','worksheet_average','workbook_total','workbook_average']
all_data.append(header)
for input_file in glob.glob(os.path.join(input_floder,'*.xls*')):
    with open_workbook(input_file) as workbook:
        list_of_totals=[]
        list_of_numbers=[]
        workbook_output=[]
        for worksheet in workbook.sheets():
            total_sales=0
            number_of_sales=0
            worksheet_list=[]
            worksheet_list.append(os.path.basename(input_file))
            worksheet_list.append(worksheet.name)
            for row_index in range(1,worksheet.nrows):
                try:
                    total_sales=total_sales+float(str(worksheet.cell_value(row_index,sales_amount_index)).strip('$').replace(',',''))
                    number_of_sales=number_of_sales+1
                except:
                    total_sales=total_sales+0
                    number_of_sales=number_of_sales+0
            average_sales='%.2f' % (total_sales/number_of_sales)
            worksheet_list.append(average_sales)
            worksheet_list.append(float(average_sales))
            list_of_totals.append(total_sales)
            list_of_numbers.append(float(number_of_sales))
            workbook_output.append(worksheet_list)
        workbook_total=sum(workbook_total)
        workbook_average=sum(list_of_totals)/sum(list_of_numbers)
        for list_element in workbook_output:
            list_element.append(worksheet_list)
            list_element.append(workbook_average)
        all_data.extend(workbook_output)
for list_index,output_list in enumerate(all_data):
    for element_list,element in enumerate(output_list):
        output_worsheet.write(list_index,element_list,element)
output_workbook.save(output_file)
上一篇:c#将数据导出成excel表格


下一篇:java使用poi导出excel