python单因素分析
import numpy as np
from scipy.stats import f, t
# 数据集
datas = [
[29.6, 24.3, 28.5, 32.0],
[27.3, 32.6, 30.8, 34.8],
[5.8, 6.2, 11.0, 8.3],
[21.6, 17.4, 18.3, 19.0],
[29.2, 32.8, 25.0, 24.2]
]
alpha = 0.05
def calculate_mean_and_variance(data):
"""计算数据集的均值和方差"""
return np.mean(data), np.var(data, ddof=1)
def calculate_total_square_sum(data):
"""计算总体平方和"""
data_mean, _ = calculate_mean_and_variance(data)
return (data - data_mean).sum() ** 2
def calculate_group_square_sum(data_all, datas):
"""计算组间平方和"""
data_all_mean, _ = calculate_mean_and_variance(data_all)
group_square_sum = 0
for group in datas:
group_mean, _ = calculate_mean_and_variance(group)
group_square_sum += len(group) * (group_mean - data_all_mean) ** 2
return group_square_sum
def calculate_error_square_sum(data_all, datas):
"""计算误差平方和"""
error_square_sum = 0
data_all_mean, _ = calculate_mean_and_variance(data_all)
for group in datas:
group_mean, _ = calculate_mean_and_variance(group)
for value in group:
error_square_sum += (value - group_mean) ** 2
return error_square_sum
def calculate_f_statistics(group_square_sum, error_square_sum, s, n):
"""计算F统计量"""
A_mean_square_sum = group_square_sum / (s - 1)
E_mean_square_sum = error_square_sum / (n - s)
return A_mean_square_sum / E_mean_square_sum
def print_analysis_results(group_square_sum, error_square_sum, total_square_sum, s, n):
"""打印分析结果"""
print("| 方差来源 | 平方和 | *度 | 均方和 |")
print(f"| 因素A | {group_square_sum} | {s-1} | {group_square_sum/(s-1):.4f} |")
print(f"| 误差E | {error_square_sum} | {n-s} | {error_square_sum/(n-s):.4f} |")
print(f"| 总体T | {total_square_sum} | {n-1} | {total_square_sum/(n-1):.4f} |")
def get_confidence_interval(data, alpha=0.05):
"""计算均值的置信区间"""
n = len(data)
mean, var = calculate_mean_and_variance(data)
t_threshold = t.isf(alpha / 2, df=n - 1)
std_err = np.sqrt(var) * t_threshold / np.sqrt(n)
return mean - std_err, mean + std_err
def get_difference_confidence_interval(data_x, data_y, n, r, Q_e, alpha=0.05):
"""计算两个均值差的置信区间"""
n_x, n_y = len(data_x), len(data_y)
x_mean, x_var = calculate_mean_and_variance(data_x)
y_mean, y_var = calculate_mean_and_variance(data_y)
t_threshold = t.isf(alpha / 2, df=n - r)
Q_e_mean = Q_e / (n - r)
std_err = np.sqrt((1/n_x + 1/n_y) * Q_e_mean) * t_threshold
return (x_mean - y_mean) - std_err, (x_mean - y_mean) + std_err
def main(datas, alpha):
# 计算总样本量
n_total = sum(len(group) for group in datas)
s = len(datas)
# 计算总体数据
data_all = np.concatenate(datas)
total_square_sum = calculate_total_square_sum(data_all)
# 计算组间和误差平方和
group_square_sum = calculate_group_square_sum(data_all, datas)
error_square_sum = calculate_error_square_sum(data_all, datas)
# 计算F统计量和临界F值
F_value = calculate_f_statistics(group_square_sum, error_square_sum, s, n_total)
F_threshold = f.isf(q=alpha, dfn=s-1, dfd=n_total-s)
# 打印分析结果
print_analysis_results(group_square_sum, error_square_sum, total_square_sum, s, n_total)
# 打印F值和临界F值
print(f"\na值: {alpha}")
print(f"临界F值---F_{alpha}({s-1},{n_total-s}): {F_threshold}")
# 根据F值判断显著性
if F_value > F_threshold:
print('拒绝原假设,不同处理之间存在显著差异')
else:
print('接受原假设,不同处理之间不存在显著差异')
# 计算每个组均值的置信区间
for i, group in enumerate(datas):
interval = get_confidence_interval(group, alpha)
print(f'第{i+1}组数据均值的置信区间为: {interval}')
# 计算每两个组之间均值差的置信区间
for i in range(len(datas)):
for j in range(i+1, len(datas)):
interval = get_difference_confidence_interval(datas[i], datas[j], n_total, s, error_square_sum, alpha)
print(f'第{i+1}组数据与第{j+1}组数据的均值差的置信区间为: {interval}')
if __name__ == "__main__":
main(datas, alpha)