数据样本
数据获取:关注并私信“关联规则案例”
# -*- codeing = utf-8 -*-
# @Time : 2021/11/26 22:41
# @Author : Tancy
# @File : 病例分析-- Apriori算法.py
# @Software : PyCharm
# 1.数据读取
import pandas as pd
df = pd.read_excel('D:\A_学习\数据仓库与数据挖掘\实验\患者病症.xlsx')
# print(df.head())
# 2. 数据预处理
symptoms = [] # 创建一个空列表 病症
# 切分 转化为一个二维数组
for i in df['病人症状'].tolist():
symptoms.append(i.split(','))
# print(symptoms)
# 将数据转化为布尔类型
from mlxtend.preprocessing import TransactionEncoder
TE = TransactionEncoder() # 构造转换类型
data = TE.fit_transform(symptoms) # 转换为一个布尔类型的表格
# print(data)
# 将布尔类型的数据存储为DataFrame 格式
import pandas as pd
df = pd.DataFrame(data, columns=TE.columns_)
# print(df.head())
# 3.挖掘频繁项集
from mlxtend.frequent_patterns import apriori
items = apriori(df, min_support=0.15, use_colnames=True)
# print(items)
# print(items[items['itemsets'].apply(lambda x:len(x))==1])
# print(items[items['itemsets'].apply(lambda x:len(x))==2])
# print(items[items['itemsets'].apply(lambda x:len(x))==3])
# print(items[items['itemsets'].apply(lambda x:len(x))==4])
# 4.根据最小置信度,在频繁项集中找强关联规则
from mlxtend.frequent_patterns import association_rules
rules = association_rules(items, min_threshold=0.6)
# print(rules)
# 5.提取关联规则,美化
for i, j in rules.iterrows():
X = j['antecedents']
Y = j['consequents']
x = ', '.join([item for item in X])
y = ', '.join([item for item in Y])
print(x + ' → ' + y)