一个简单项目
涉及内容:数据调入,过滤,展示
数据来源于https://github.com/839Studio/Novel-Coronavirus-Updates/blob/master/README.md,经石墨文档下载。
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
plt.rcParams['font.sans-serif']=['SimHei']
class NCP(object):
def __init__(self):
data = pd.read_excel('./Data/NCP_20200226_morn.xlsx')
# drop the useless
data = data.drop(['消息来源','来源链接1','来源链接2','来源链接3','备注'], axis = 1)
# define the province list, only including China
self.provlist = ['全国', '湖北外', '北京','天津','上海','重庆','河北','山西','辽宁','吉林','黑龙江','江苏','浙江','安徽',\
'福建','江西','山东','河南','湖北','湖南','广东','海南','四川','贵州','云南','陕西','甘肃','青海',\
'*','内蒙古','广西','*','宁夏','*','香港','澳门']
# filtrate the domestic provinces
self.total_dom = data[data['省份'].isin(self.provlist)]
# aquire the increment data
self.new_dom = self.query_new_dom('全国')
def __prsv__(self):
'''
To save the data, for local input
'''
for i in self.provlist:
self.query_new_dom(i)
self.query_total_dom(i)
return None
def query_new_dom(self, prov:str) -> pd.DataFrame:
'''
Return the increment data for a certain area, sorted and summed according to the date
Parameters:
--------------------
prov: str
name of a certain province
'''
if (prov in self.provlist)&(prov != '全国'):
daily_new = self.total_dom[self.total_dom['省份'] == prov].groupby('报道时间').aggregate(np.sum)
for i in self.new_dom.index:
if i not in daily_new.index:
daily_new.loc[i] = [0,0,0]
elif prov == '全国':
daily_new = self.total_dom.groupby('报道时间').aggregate(np.sum)
elif prov == '湖北外':
daily_new = self.query_new_dom('全国')-self.query_new_dom('湖北')
else:
return None
daily_new.sort_index(inplace = True)
daily_new.to_excel('./Data/NCP_Daily_New_dom.xlsx', sheet_name = prov)
return daily_new
def query_total_dom(self, prov: str) -> pd.DataFrame:
'''
Aquire the sum-up data
Parameters:
-------------------
prov: str
name of a certain province
'''
daily_total = self.query_new_dom(prov).cumsum()
daily_total.to_excel('./Data/NCP_Daily_Sum_dom.xlsx', sheet_name = prov)
return daily_total
def depict_new(self, prov: str) -> plt.figure:
'''
Depict the increment data
Parameters:
-------------------------
prov: str
name of a certain province
'''
data = self.query_new_dom(prov)
f = plt.figure(figsize = [15,12])
plt.subplot(2,1,1)
x = data.index
y = data['新增确诊']
plt.plot(x, y, marker = 'o', label = '新增确诊')
gap = max(y)-min(y)
plt.ylim(-0.1*gap, max(y)+0.1*gap)
for a,b in zip(data.index,data['新增确诊']):
plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom', fontsize=10)
plt.legend()
plt.title('肺炎新增数据--'+prov)
plt.subplot(2,1,2)
y1 = data['新增出院']
y2 = data['新增死亡']
plt.plot(x, y1, marker = 'o', label = '新增出院')
plt.plot(x, y2, marker = 'o', label = '新增死亡')
gap = max(max(y1),max(y2))-min(min(y1),min(y2))
plt.ylim(-0.1*gap, max(max(y1),max(y2))+0.1*gap)
for a,b in zip(data.index,data['新增出院']):
plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom',fontsize=12)
for a,b in zip(data.index,data['新增死亡']):
plt.text(a, b, '%.0f' % b, ha='left', va= 'top',fontsize=12)
plt.legend()
plt.title('肺炎新增数据--'+prov)
return f
def depict_total(self, prov: str) -> plt.figure:
'''
depict the sumup data
Parameters:
---------------------------
prov: str
name of a certain province
'''
data = self.query_total_dom(prov)
f = plt.figure(figsize = [15,12])
plt.subplot(2,1,1)
x = data.index
y = data['新增确诊']
plt.plot(x, y, marker = 'o', label = '累计确诊')
gap = max(y)-min(y)
plt.ylim(-0.1*gap, max(y)+0.1*gap)
for a,b in zip(data.index,data['新增确诊']):
plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom', fontsize=10)
plt.legend()
plt.title('肺炎累计数据--'+prov)
plt.subplot(2,1,2)
y1 = data['新增出院']
y2 = data['新增死亡']
plt.plot(x, y1, marker = 'o', label = '累计出院')
plt.plot(x, y2, marker = 'o', label = '累计死亡')
gap = max(max(y1),max(y2))-min(min(y1),min(y2))
plt.ylim(-0.1*gap, max(max(y1),max(y2))+0.1*gap)
for a,b in zip(data.index,data['新增出院']):
plt.text(a, b, '%.0f' % b, ha='right', va= 'bottom',fontsize=12)
for a,b in zip(data.index,data['新增死亡']):
plt.text(a, b, '%.0f' % b, ha='left', va= 'top',fontsize=12)
plt.legend()
plt.title('肺炎累计数据--'+prov)
return f
暂时只有这几个功能,其他的想到再做吧,好像没什么需求。做城市做外国都一样。倒是想看看省份-城市那种直接调整的索引列表怎么做的。总而言之是无聊找点事做。
Some Translation:
What’s warmer than the wild sea is the stomach of bear
Helpless shrimp soothes its anxiety against Gastric acid.