import pandas as pd
market_data = pd.read_excel('./supermarket_data.xlsx')
# 打印表格部分信息
market_data.head()
market_data.describe()
普通柱状图
# 城市
print(market_data['城市'][0], market_data['销售额'][0])
city_cost = market_data.groupby('城市')['销售额'].sum()
area_cost = market_data.groupby('地区')['销售额'].sum()
area_cost = area_cost.sort_values()
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
plt.rcParams["font.family"] = 'Arial Unicode MS'
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.figure(figsize=(12,8))
# 设置y轴不用科学计数法表示
plt.gca().yaxis.set_major_formatter(mticker.FormatStrFormatter('%d'))
# 给柱状图顶部添加标签
for a, b in zip(area_cost.index, area_cost):
plt.text(a, b + 0.05, '%.0f' % b, ha='center', va='bottom', fontsize=10)
plt.bar(area_cost.index, area_cost)
plt.savefig()
横向堆叠柱状图
1
area_profit = market_data.groupby('地区')['利润']
area_cost = market_data.groupby('地区')['销售额']
res = []
index = []
col_index = ['利润', '销售额']
for name, group in area_profit:
cur = []
index.append(name)
cur.append(group.sum())
res.append(cur)
for name, group in area_cost:
res[index.index(name)].append(group.sum() - res[index.index(name)][0])
print(index)
df1=pd.DataFrame(res, index=index, columns=col_index)
ax=df1.plot.barh(stacked=True,figsize=(20, 10),fontsize=20)
# 设置标题
ax.set_title("地区销售额利润表",fontsize=20)
# 设置题注大小
font1 = {
'weight' : 'normal',
'size' : 20,
}
ax.legend(col_index, shadow=True, prop=font1)
ax.xaxis.set_major_formatter(mticker.FormatStrFormatter('%d'))
# ax.axes.title.set_size(20)
fig=ax.get_figure()
fig.savefig('地区销售额利润.png')
2
discount_range = neg_discount.groupby('折扣')
discount_area_val = []
index = area_cost.sum().index
discount_index_val = []
for name,group in discount_range:
discount_index_val.append(name)
cur = []
group = group.groupby('地区').size()
for area in index:
if area in group:
cur.append(group[area])
else :
cur.append(0)
discount_area_val.append(cur)
df2=pd.DataFrame(discount_area_val, index=discount_index_val, columns=index)
df2.plot.barh(stacked=True,figsize=(20, 10),fontsize=20)
font1 = {
'weight' : 'normal',
'size' : 20,
}
axis = plt.gca()
axis.set_title('各区折扣情况', fontsize=20)
axis.legend(index, shadow=True,prop=font1)
# 保存图片
plt.savefig('折扣地区分布图_折线.png')
饼状图
# 统计打折商品的分布情况
discount_range = neg_discount['折扣'].value_counts()
print(discount_range)
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
plt.rcParams["font.family"] = 'Arial Unicode MS'
plt.rcParams['axes.unicode_minus'] = False #用来正常显示负号
plt.figure(figsize=(10,10))
plt.title("折扣分布图",fontsize=25)
patches,l_text,p_text = plt.pie(discount_range,labels=discount_range.index * 100, autopct="%2f")
for t in l_text:
t.set_size(30)
for t in p_text:
t.set_size(20)
# 设置x,y轴刻度一致,这样饼图才能是圆的
plt.axis('equal')
plt.legend()
# plt.show()
plt.savefig('./折扣分布图.png')
折线图
# print(market_data.head())
discount = market_data[market_data['折扣']>0]
area_group = discount.groupby('地区')
area_data_month = []
index = []
for name , area in area_group:
index.append(name)
area_time_data = area.set_index(['订单日期'], drop=False)
area_time_data=area_time_data.resample('y').sum()
area_data_month.append(area_time_data['销售额'])
df2=pd.DataFrame(area_data_month)
df2 = df2.T
ax = df2.plot(title="地区折扣订单年销售额图", figsize=(20,10),fontsize=20, linewidth=5)
font1 = {
'weight' : 'normal',
'size' : 20,
}
axis = plt.gca()
axis.yaxis.set_major_formatter(mticker.FormatStrFormatter('%d'))
axis.set_title("地区折扣订单年销售额图", fontsize=20)
axis.legend(index, shadow=True,prop=font1)
# 保存图片
# fig=df2.get_figure()
plt.savefig('地区折扣订单年销售额图_折线.png')