二利用pandas统计中国百亿富豪的信息

2024-01-09 14:24:40

一说明

数据来源：新财富 http://www.xcf.cn/zhuanti/ztzz/hdzt1/500frb/index.html
我是通过把这些数据放到excle表格里统计的数据。

二统计中国那个地区百亿富豪人数最多


import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rcParams['font.sans-serif'] = ['KaiTi']
mpl.rcParams['font.serif'] = ['KaiTi']
# 统计各地区百亿富豪人数
people = pd.read_excel('C:/Users/hunin/Desktop/by.xlsx')
area = []
for i in people.index:
    area.append('')
people['地区'] = area

for i in people.index:
    # print(i)
    people['地区'].at[i] = people['公司总部'].at[i][0:2]
    # print(students['地区'].at[i])
feng = {}
for i in people.index:
    if people['地区'].at[i] in feng.keys():
        feng[people['地区'].at[i]] += 1
    else:
        feng[people['地区'].at[i]] = 1
data = {
    '地区': list(feng.keys()),
    '人数': list(feng.values())
}

df = pd.DataFrame(data)
df.sort_values(by="人数",inplace=True, ascending=False)
# print(df)
plt.bar(df['地区'], df['人数'], color='orange')
plt.xticks(df['地区'], rotation='45')
plt.xlabel('地区',fontsize=19,color="red")
plt.ylabel('人数',fontsize=19,color="red")
plt.title('中国百亿富豪各地区人数', fontsize=16)

num_list=[]
for i in feng.values():
    num_list.append(i)
num_list.sort(reverse=True)
for a, b in zip(df.地区,num_list):
    plt.text(a, b, '%.0f' % b, ha='center', va='bottom', fontsize=11)
plt.tight_layout()
plt.show()

运行代码结果为：

三统计百亿富豪集中在那个年龄段

import matplotlib.pyplot as plt
import pandas as pd

plt.rcParams['font.sans-serif'] = ['SimHei']

student=pd.read_excel('C:/Users/hunin/Desktop/by.xlsx')
pep1=student.loc[student["年龄"].apply(lambda a:18<=int(a)<=40)]
pep2=student.loc[student["年龄"].apply(lambda a:41<=int(a)<=50)]
pep3=student.loc[student["年龄"].apply(lambda a:51<=int(a)<=60)]
pep4=student.loc[student["年龄"].apply(lambda a:61<=int(a)<=70)]
pep5=student.loc[student["年龄"].apply(lambda a:71<=int(a)<=80)]
pep6=student.loc[student["年龄"].apply(lambda a:81<=int(a)<=90)]
dict={"18-40岁":len(pep1),"41-50岁":len(pep2),"51-60岁":len(pep3),"61-70岁":len(pep4),"71-80岁":len(pep5),"80-91岁":len(pep6)}


data = {
    '年龄': list(dict.keys()),
    '人数': list(dict.values())
}
df = pd.DataFrame(data)
df.sort_values(by="人数",inplace=True, ascending=False)
plt.bar(df['年龄'], df['人数'], color='orange')
plt.xticks(df['年龄'], rotation='45')
plt.xlabel('年龄',fontsize=19,color="red")
plt.ylabel('人数',fontsize=19,color="red")
plt.title('中国百亿富豪年龄区间', fontsize=16)

num_list=[]
for i in dict.values():
    num_list.append(i)
num_list.sort(reverse=True)
for a, b in zip(df.年龄,num_list):
    plt.text(a, b, '%.0f' % b, ha='center', va='bottom', fontsize=11)
plt.tight_layout()
plt.show()

效果如图：

码农公寓

一 说明

二 统计中国那个地区百亿富豪人数最多

三 统计百亿富豪集中在那个年龄段

相关文章

一说明

二统计中国那个地区百亿富豪人数最多

三统计百亿富豪集中在那个年龄段