爬取各地区gdp数据并保存
import requests
import json.encoder
import pandas as pd
import matplotlib.pyplot as plt
# 获取数据
def getData():
headers = {
'Connection': 'keep-alive',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.116 Safari/537.36',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Referer': 'https://data.stats.gov.cn/easyquery.htm?cn=E0103',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9',
}
params = (
('m', 'QueryData'),
('dbcode', 'fsnd'),
('rowcode', 'reg'),
('colcode', 'sj'),
('wds', '[{"wdcode":"zb","valuecode":"A020101"}]'),
('dfwds', '[]'),
('k1', '1625471171166'),
)
# 1、获取数据
response = requests.get('https://data.stats.gov.cn/easyquery.htm', headers=headers, params=params, verify=False)
# 2、解析数据
data=json.loads(response.content)
name=data['returndata']['wdnodes'][0]['nodes'][0]['cname']
datanodes=data['returndata']['datanodes']
columns=data['returndata']['wdnodes'][2]['nodes']
rows=data['returndata']['wdnodes'][1]['nodes']
# 3、写入数据
# 数据标题
column=[]
column.append(name)
for temp in columns:
column.append(temp['cname'])
temp_contents=[]
index=0
for row in rows:
temp_content=[]
temp_content.append(row['cname'])
for i in range(index,index+10):
temp_content.append(datanodes[i]['data']['data'])
index+=10
temp_contents.append(temp_content)
return column,temp_contents,name
# 保存数据
def save_csv(path,column,temp_contents):
df=pd.DataFrame(temp_contents,columns=column)
df.to_csv(path,mode='w',index=False,encoding='utf-8')
def get(path):
df_year=['2020','2019','2018','2017','2016','2015','2014','2013','2012','2011']
df=pd.read_csv('地区生产总值.csv')
for i in range(31):
area=df.loc[i]
title=area[0]
df_data=[]
for i in range(1,11):
df_data.append(area[i])
plt.rcParams['font.sans-serif']=['SimHei']
plt.title(title)
plt.plot(df_year, df_data,)
plt.show()
if __name__ == "__main__":
# 获取数据
column,temp_contents,name=getData()
path=name+'.csv'
# 保存数据
save_csv(path,column,temp_contents)
get(path)