目录
前言
刚刚过去的冬季奥运会,中国队取得了不错的成绩。本文将分别基于Pandas和Pyecharts进行数据处理和数据可视化,并利用可视化图表对奥运会相关信息进行展示。
一、导入模块
import pandas as pd
from pyecharts.charts import Timeline, Line, Tree
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType
二、Pandas数据处理
2.1 读取数据
df = pd.read_csv('D:/天池数据/2020东京奥运会奖牌数据可视化/2020东京奥运会奖牌数据.csv', index_col=0, encoding = 'gb18030')
df.head(10)
2.2 是否有缺失值
df.isnull().any()
各列数据均不存在确实情况。
2.3 查看中国每日数据
# 方法一
df1 = df[df['国家']=='中国']
df1
# 方法二
df1 = df[df.国家 == '中国']
df1
2.4 统计中国、美国、日本、澳大利亚4个国家数据
all_country_data = []
flg = {}
cols = ['国家']
countrys = ['中国','美国','日本','澳大利亚']
for country in countrys:
df1 = df[df['国家']==country]
df_t = df1.copy()
df2 = df.loc[df['国家']==country,['金牌','银牌','铜牌','总计']]
if len(df2.index.tolist()) >= len(cols):
cols += df2.index.tolist()
flg[country] = df1.iloc[:1, -1].values[0]
one_country_data = [country]
datasss = []
for i in range(df2.shape[0]):
datasss.append(df2[:i+1].apply(lambda x:x.sum()).values.tolist())
d1 = pd.DataFrame(data=datasss, columns=['金牌','银牌','铜牌','总计'])
for col in d1.columns:
df_t[col] = d1[col].values
df_t1 = df_t.loc[:,['金牌']]
one_country_data += df_t['金牌'].values.tolist()
all_country_data.append(one_country_data)
all_country_data
生成新的Dataframe:
d2 = pd.DataFrame(data=all_country_data,columns=cols)
d2 = d2.fillna(method = 'ffill',axis=1)
d2
method='ffill':用前一个非缺失值去填充缺失值。
method='bfill':用下一个非缺失值去填充缺失值。
这里采用前一个非缺失值对缺失值进行填充。同时,可根据需要获取多个国家数据,改变countrys列表即可。
三、Pyecharts绘图
3.1 绘制基础折线图
CHN = []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
l1 = (
Line()
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
label_opts=opts.LabelOpts(is_show=True))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40),
legend_opts=opts.LegendOpts(is_show=False),
)
)
l1.render_notebook()
3.2 加载样式
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = { 'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN = []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic,label_opts=opts.LabelOpts(font_size=12, color='red' ))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(
color='#DC143C', font_size=20)
),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(font_size=16,font_weight='bold',color='#FFD700'),
axislabel_opts=opts.LabelOpts(font_size=13,color='red'),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=False, pos_right='1.5%', pos_top='2%',
legend_icon='roundRect',orient = 'horizontal'),
)
)
line.render_notebook()
3.3 动态展示中国每日金牌数据
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = {'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px', height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN = []
x_data = cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家'] == '中国'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px', height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://' + flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic, label_opts=opts.LabelOpts(font_size=12, color='red'))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(color='#DC143C', font_size=20)),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(
font_size=16, font_weight='bold', color='#FFD700'),
axislabel_opts=opts.LabelOpts(
font_size=13, color='red', rotate=15),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=True, pos_right='1%', pos_top='2%',
legend_icon='roundRect', orient='vertical'),
)
)
timeline.add(line, '{}'.format(d_time))
timeline.render_notebook()
3.4 增加其他国家每日金牌数据
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = { 'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN, USA, JPN, AUS = [], [], [], []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
USA.append(d2[d_time][d2['国家']=='美国'].values.tolist()[0])
JPN.append(d2[d_time][d2['国家']=='日本'].values.tolist()[0])
AUS.append(d2[d_time][d2['国家']=='澳大利亚'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 美国线条
.add_yaxis(
'美国',
USA,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['美国'],
symbol_size=[40, 25],
)
],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 日本线条
.add_yaxis(
'日本',
JPN,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['日本'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 澳大利亚线条
.add_yaxis(
'澳大利亚',
AUS,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['澳大利亚'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic)
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国 VS 美国 VS 日本 VS 澳大利亚',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(
color='#DC143C', font_size=20)
),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(font_size=16,font_weight='bold',color='#FFD700'),
axislabel_opts=opts.LabelOpts(font_size=13,color='red',rotate=15),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=True, pos_right='1%', pos_top='2%',
legend_icon='roundRect',orient = 'vertical'),
))
timeline.add(line, '{}'.format(d_time))
timeline.render_notebook()
3.5 2020东京奥运会奖牌数世界分布
import requests
from pyecharts.charts import Map
# 获取数据:
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/total-table/15/110000004609'
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
datas = r.json()['body']['allMedalData']
df = pd.DataFrame()
for data in datas:
df = df.append([[
data['countryName'],
data['goldMedalNum'],
data['silverMedalNum'],
data['bronzeMedalNum'],
data['totalMedalNum']]])
df.columns = ['国家', '金牌', '银牌', '铜牌', '奖牌']
df = df.reset_index(drop=True)
df['国家'].replace('俄奥委会','俄罗斯',inplace=True)
name_map = {
'Singapore Rep.': '新加坡',
...
'Comoros': '科摩罗'
}
m0 = (
Map()
.add("奖牌数", [list(z) for z in zip(df['国家'].values, df['奖牌'].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="2020东京奥运会奖牌数世界分布"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
is_show=True,
max_=120,
is_piecewise=True,
split_number = 8,
),
)
)
m0.render_notebook()
首先,利用requests库爬取相关数据,并对数据进行相应的处理;之后,利用pyecharts绘制map地图对奖牌数的世界发布进行可视化。
3.6 2020东京奥运会金牌世界分布
m1 = (
Map()
.add("金牌", [list(z) for z in zip(df['国家'].values, df['金牌'].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="2020东京奥运会金牌世界分布"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(is_show=True, max_=40,
is_piecewise=True,
split_number = 8,
range_color=['#FFFFE0', '#FFA07A', '#CD5C5C', '#8B0000']
),
)
)
m1.render_notebook()
3.7 2020东京奥运会奖牌世界分布(动态)
timeline = Timeline(init_opts=opts.InitOpts(theme=ThemeType.DARK))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=False, play_interval=800)
colls=['奖牌','金牌','银牌','铜牌']
maxx = [120,40,40,40]
for index, col in enumerate(colls):
m = (
Map()
.add(col, [list(z) for z in zip(df['国家'].values, df[col].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(is_show=False,
max_=maxx[index],
is_piecewise=True,
split_number = 20,
),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text="2020奥运会{}分布".format(col),
font="bold 26px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#fff"
),
),
),
],
)
],
)
)
timeline.add(m, "{}分布".format(col))
timeline.render_notebook()
总结
本文利用Pandas对数据进行处理,并利用Pyecharts绘制折线图(Line)和地图(Map),并通过添加时间轴组件(Timeline)对奥运会数据进行动态的可视化展示。Pyecharts相关的参数说明以及其他类型的图表制作可以参阅Pyecharts的官方文档简介 - pyecharts - A Python Echarts Plotting Library built with love.