from warnings import simplefilter simplefilter(action='ignore', category=FutureWarning) from sklearn.tree import export_graphviz from sklearn.tree import DecisionTreeClassifier #3:1拆分数据集 from sklearn.model_selection import train_test_split #乳腺癌数据集 from sklearn.datasets import load_breast_cancer import pydot cancer = load_breast_cancer() #参数random_state是指随机生成器,0表示函数输出是固定不变的 X_train,X_test,y_train,y_test = train_test_split(cancer['data'],cancer['target'],random_state=42) tree = DecisionTreeClassifier(random_state=0) tree.fit(X_train,y_train) print('Train score:{:.3f}'.format(tree.score(X_train,y_train))) print('Test score:{:.3f}'.format(tree.score(X_test,y_test))) #生成可视化图 export_graphviz(tree,out_file="tree.dot",class_names=['严重','轻微'],feature_names=cancer.feature_names,impurity=False,filled=True) #展示可视化图 (graph,) = pydot.graph_from_dot_file('tree.dot') graph.write_png('tree.png') import numpy as np import matplotlib.pyplot as plt from sklearn import linear_model, decomposition, datasets from sklearn.pipeline import Pipeline from sklearn.model_selection import GridSearchCV logistic = linear_model.LogisticRegression() pca = decomposition.PCA() pipe = Pipeline(steps=[('pca', pca), ('logistic', logistic)]) digits = datasets.load_digits() X_digits = digits.data y_digits = digits.target ############################################################################### # Plot the PCA spectrum pca.fit(X_digits) plt.figure(1, figsize=(4, 3)) plt.clf() plt.axes([.2, .2, .7, .7]) plt.plot(np.cumsum(pca.explained_variance_ratio_), linewidth=2) plt.axis('tight') plt.xlabel('n_components') plt.ylabel('explained_variance_') ############################################################################### # Prediction n_components = [20, 40, 64] Cs = np.logspace(-4, 4, 3) #Parameters of pipelines can be set using ‘__’ separated parameter names: estimator = GridSearchCV(pipe, dict(pca__n_components=n_components, logistic__C=Cs)) estimator.fit(X_digits, y_digits) plt.axvline(estimator.best_estimator_.named_steps['pca'].n_components, linestyle=':', label='n_components chosen') plt.legend(prop=dict(size=12)) plt.show()