关联算法有几个重要的概念:
下面以官方教程为例
[['Apple', 'Beer', 'Rice', 'Chicken'],
['Apple', 'Beer', 'Rice'],
['Apple', 'Beer'],
['Apple', 'Bananas'],
['Milk', 'Beer', 'Rice', 'Chicken'],
['Milk', 'Beer', 'Rice'],
['Milk', 'Beer'],
['Apple', 'Bananas']]
支持度
例子
置信度
from mlxtend.preprocessing import TransactionEncoder
info = [['Apple', 'Beer', 'Rice', 'Chicken'],
['Apple', 'Beer', 'Rice'],
['Apple', 'Beer'],
['Apple', 'Bananas'],
['Milk', 'Beer', 'Rice', 'Chicken'],
['Milk', 'Beer', 'Rice'],
['Milk', 'Beer'],
['Apple', 'Bananas']]
#print(filelist)
TE = TransactionEncoder()
datas = TE.fit_transform(info)
import pandas as pd
df = pd.DataFrame(datas, columns=TE.columns_)
print(df)
from mlxtend.frequent_patterns import apriori
item = apriori(df, min_support=0.4, use_colnames=True)
item[item['itemsets'].apply(lambda x: len(x))>=2]
print(item)
from mlxtend.frequent_patterns import association_rules
rules = association_rules(item,min_threshold=0.8)
print(rules)
for i, j in rules.iterrows():
X = j['antecedents']
Y = j['consequents']
x = ','.join(item for item in X )
y = ','.join(item for item in Y)
print(x + '->'+y)