一、计算四分位
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:ZSW
@file:quantile_distance.py
@time:2021/02/05
"""
import pandas as pd
import numpy as np
# 读取excle文件
excel_data = pd.read_excel("1.xlsx")
# print(excel_data)
data1 = excel_data.to_dict(orient="list")
data_list = data1['time']
data_list = sorted(data_list)
# 法一:使用numpy工具包
res = np.percentile(data_list, (25, 50, 75), interpolation='midpoint')
print(res)
# 法二:直接计算
def median(x):
x = sorted(x)
length = len(x)
mid, rem = divmod(length, 2) # divmod函数返回商和余数
if rem:
return x[:mid], x[mid + 1:], x[mid]
else:
return x[:mid], x[mid:], (x[mid - 1] + x[mid]) / 2
lHalf, rHalf, q2 = median(data_list)
print('上四分位:{}'.format(median(lHalf)[2]))
print('中位数:{}'.format(q2))
print('下四分位:{}'.format(median(rHalf)[2]))
print('四分位距:{}'.format(median(rHalf)[2]-median(lHalf)[2]))
res = (median(rHalf)[2]-median(lHalf)[2])*1.5 + median(rHalf)[2]
print('异常阈值:{}'.format(res))
# 法三:使用pandas中describe()函数
print(pd.DataFrame(data_list).describe())
结果:
[0.12638889 0.59694444 2.32527778]
上四分位:0.126388888888889
中位数:0.596944444444444
下四分位:2.3252777777777798
四分位距:2.1988888888888907
异常阈值:5.623611111111115
0
count 37932.000000
mean 3.041283
std 5.612899
min 0.000000
25% 0.126389
50% 0.596944
75% 2.325139
max 25.071944
二、绘制箱型图
#!/usr/bin/python
# -*- coding: UTF-8 -*-
"""
@author:ZSW
@file:plot_quantile_distance.py
@time:2021/02/05
"""
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
# 解决matplotlib无法显示中文问题
mpl.rcParams["font.sans-serif"] = ["SimHei"]
mpl.rcParams["axes.unicode_minus"] = False
df = pd.read_excel("1.xlsx")
data = df['avg_time']
data.plot.box(title="箱型图")
plt.show()
参考:http://www.79tui.com/happy/605215.html
https://blog.csdn.net/sinat_28252525/article/details/80671238