具体参考决策树基本原理和讲解:
信息熵的学习参考:链接:https://pan.baidu.com/s/1_AF7xoUhm3XgcYGfGrk5ng
提取码:kztu
决策树的学习参考:https://sklearn.apachecn.org/docs/master/11.html
代码使用的csv参考:链接:https://pan.baidu.com/s/1IxbFuwET7qMbJyEu76WROg
提取码:g7f7
决策树代码
from sklearn import tree
from sklearn.feature_extraction import DictVectorizer
from sklearn import preprocessing
import pandas as pb
import glob
import numpy as np
file_csv=glob.glob(r"F:\DTree\*.csv")
total_decison=[]
list_decision=[]
label_decision=[]
for csv in file_csv:
df=pb.read_csv(csv,index_col=False)
for indexs in df.index:
if indexs==0:
headInfo = list(df.head(indexs))[1:-1]
continue
label_decision.append(df.iloc[indexs,-1])
list_decision.append(list(df.iloc[indexs,1:-1]))
for item in list_decision:
dict_dec = {}
for index,it in enumerate(item):
dict_dec[headInfo[index]]=it
total_decison.append(dict_dec)
print(total_decison)
print(label_decision)
vec=DictVectorizer()
dumpx=vec.fit_transform(total_decison).toarray()
print("dumpx",str(dumpx))
lb=preprocessing.LabelBinarizer()
dumpy=lb.fit_transform(label_decision)
print("dumpy",str(dumpy))
clf=tree.DecisionTreeClassifier(criterion='entropy')
clf=clf.fit(dumpx,dumpy)
print(str(clf))
with open("F:/a.dot","w") as f:
f=tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
newRowX = dumpx[0, :]
print("oneRowX: " + str(newRowX))
newRowX[0] = 1
newRowX[2] = 0
print("newRowX: " + str(newRowX))
predictedY = clf.predict(np.array(newRowX).reshape(1, -1))
print("predictedY: " + str(predictedY))