一、pandas
pandas模块是数据分析的大杀器,它使得对于文件相关的操作变得简单。
看一下它的简单使用
import pandas as pd # 读取
df = pd.read_csv('all_forum_info.csv')
print(df.info()) # 写入
df.to_csv('data.csv')
具体用法参照我的博客pandas系列 https://www.cnblogs.com/zhangyafei/p/10685438.html
二、csv
1.读取
import csv # 读取
with open('all_forum_info.csv', encoding='utf-8') as myFile:
lines = csv.reader(myFile)
for line in lines:
print(line) ['hgroup', 'forum_topic', 'forum_url', 'count_topic', 'count_post']
['Welcome to Breastcancer.org', 'Info & Resources for New Patients & Members - Please No Posts!', 'https://community.breastcancer.org/forum/131', '11', '11']
['Welcome to Breastcancer.org', 'Acknowledging and honoring our Community', 'https://community.breastcancer.org/forum/135', '111', '5,372']
['Not Diagnosed but Concerned', 'Not Diagnosed But Worried', 'https://community.breastcancer.org/forum/83', '16,638', '133,217']
['Not Diagnosed but Concerned', 'Waiting for Test Results', 'https://community.breastcancer.org/forum/62', '6,712', '62,439']
['Not Diagnosed but Concerned', 'Benign Breast Conditions', 'https://community.breastcancer.org/forum/148', '795', '4,094']
['Not Diagnosed but Concerned', 'High Risk for Breast Cancer', 'https://community.breastcancer.org/forum/47', '2,146', '23,435']
['Tests, Treatments & Side Effects', 'Just Diagnosed', 'https://community.breastcancer.org/forum/5', '6,894', '103,851']
['Tests, Treatments & Side Effects', 'Diagnosed and Waiting for Test Results', 'https://community.breastcancer.org/forum/147', '592', '6,442']
2. 写入
"""
写入
'w'表示写模式。
首先open()函数打开当前路径下的名字为't.csv'的文件,如果不存在这个文件,则创建它,返回myFile文件对象。
csv.writer(myFile)返回writer对象myWriter。
writerow()方法是一行一行写入,writerows方法是一次写入多行。
注意:如果文件't.csv'事先存在,调用writer函数会先清空原文件中的文本,再执行writerow/writerows方法。
""" with open('data.csv', 'w', encoding='utf-8') as myFile:
myWriter = csv.writer(myFile)
myWriter.writerow(['title', '数学'])
myWriter.writerow([8, 'h'])
myList = [[1, 2, 3], [4, 5, 6]]
myWriter.writerows(myList)
data.csv
title,数学
8,h
1,2,3
4,5,6
修改分割符
csv.register_dialect('mydialect', delimiter='|', quoting=csv.QUOTE_ALL)
with open('test.csv', 'r') as myFile:
lines = csv.reader(myFile, 'mydialect')
# print(lines.line_num)
for line in lines:
print(line) ['1', '2', '3']
['2', '3', '4']
三、xlrd, xlwt
1. 读取
import xlrd myWorkbook = xlrd.open_workbook('data.xlsx') # 读取excel的三种方式 # 方式一
mySheets = myWorkbook.sheets() # 获取工作表list。
mySheet = mySheets[0] # 通过索引顺序获取。
# 方式二
# mySheet1 = myWorkbook.sheet_by_index(0) # 通过索引顺序获取。
# 方式三
# mySheet2 = myWorkbook.sheet_by_name('table') # 通过名称获取。 # 获取行数和列数
nrows = mySheet.nrows
ncols = mySheet.ncols
print(nrows, ncols) # 获取一行和一列
myRowValues = mySheet.row_values(0) # i是行数,从0开始计数,返回list对象。 myColValues = mySheet.col_values(0) # i是列数,从0开始计数,返回list对象。 print(myRowValues, myColValues) # 读取单元格数据
# myCell = mySheet.cell(0, 0) # 获取单元格,i是行数,j是列数,行数和列数都是从0开始计数。
# myCellValue = myCell.value # 通过单元格获取单元格数据。
myCellValue = mySheet.cell_value(0, 0) # 直接获取单元格数据,i是行数,j是列数,行数和列数都是从0开始计数。
print(myCellValue)
2. 写入
# 写入
import xlwt # 创建Excel工作薄
myWorkbook = xlwt.Workbook() # 添加Excel工作表
mySheet = myWorkbook.add_sheet('A Test Sheet') # 写入数据
myStyle = xlwt.easyxf('font: name Times New Roman, color-index red, bold on', num_format_str='#,##0.00') #数据格式
mySheet.write(1, 1, 1234.56, myStyle)
mySheet.write(2, 0, 1) #写入A3,数值等于1
mySheet.write(2, 1, 1) #写入B3,数值等于1
mySheet.write(2, 2, xlwt.Formula("A3+B3")) #写入C3,数值等于2(A3+B3) # 保存
myWorkbook.save('excelFile.xlsx')
xlutils结合xlrd可以达到修改excel文件目的
import xlrd
from xlutils.copy import copy workbook = xlrd.open_workbook('excelFile.xlsx')
workbooknew = copy(workbook)
ws = workbooknew.get_sheet(0)
ws.write(3, 0, 'changed!')
workbooknew.save('excelFilecopy.xls')
openpyxl可以对excel文件进行读写操作
from openpyxl import Workbook
from openpyxl import load_workbook
from openpyxl.writer.excel import ExcelWriter workbook_ = load_workbook("data.xlsx")
sheetnames = workbook_.get_sheet_names() # 获得表单名字
print(sheetnames)
sheet = workbook_.get_sheet_by_name(sheetnames[0])
print(sheet.cell(row=3, column=3).value)
sheet['A1'] = '47'
workbook_.save("data_new.xlsx")
wb = Workbook()
ws = wb.active
ws['A1'] = 4
wb.save("data_new2.xlsx")
xlsxwriter可以写excel文件并加上图表
import xlsxwriter def get_chart(series):
chart = workbook.add_chart({'type': 'line'})
for ses in series:
name = ses["name"]
values = ses["values"]
chart.add_series({
'name': name,
'categories': 'A2:A10',
'values': values
})
chart.set_size({'width': 700, 'height': 350})
return chart if __name__ == '__main__':
workbook = xlsxwriter.Workbook('H5应用中心关键数据及趋势.xlsx')
worksheet = workbook.add_worksheet("每日PV,UV")
headings = ['日期', '平均值']
worksheet.write_row('A1', headings)
index = 0
for row in range(1, 10):
for com in [0, 1]:
worksheet.write(row, com, index)
index += 1
series = [{"name": "平均值", "values": "B2:B10"}]
chart = get_chart(series)
chart.set_title({'name': '每日页面分享数据'})
worksheet.insert_chart('H7', chart)
workbook.close()