from sklearn.svm import LinearSVC from sklearn.datasets import load_iris from sklearn.feature_selection import RFE,RFECV from sklearn.model_selection import train_test_split #数据预处理包裹式特征选取RFE模型 def test_RFE(): iris=load_iris() X=iris.data y=iris.target estimator=LinearSVC() selector=RFE(estimator=estimator,n_features_to_select=2) selector.fit(X,y) print("N_features %s"%selector.n_features_) print("Support is %s"%selector.support_) print("Ranking %s"%selector.ranking_) #调用test_RFE() test_RFE()
#数据预处理包裹式特征选取RFECV模型 def test_RFECV(): iris=load_iris() X=iris.data y=iris.target estimator=LinearSVC() selector=RFECV(estimator=estimator,cv=3) selector.fit(X,y) print("N_features %s"%selector.n_features_) print("Support is %s"%selector.support_) print("Ranking %s"%selector.ranking_) print("Grid Scores %s"%selector.grid_scores_) #调用test_RFECV() test_RFECV()
def test_compare_with_no_feature_selection(): ''' 比较经过特征选择和未经特征选择的数据集,对 LinearSVC 的预测性能的区别 ''' ### 加载数据 iris=load_iris() X,y=iris.data,iris.target ### 特征提取 estimator=LinearSVC() selector=RFE(estimator=estimator,n_features_to_select=2) X_t=selector.fit_transform(X,y) #### 切分测试集与验证集 X_train,X_test,y_train,y_test=train_test_split(X, y,test_size=0.25,random_state=0,stratify=y) X_train_t,X_test_t,y_train_t,y_test_t=train_test_split(X_t, y,test_size=0.25,random_state=0,stratify=y) ### 测试与验证 clf=LinearSVC() clf_t=LinearSVC() clf.fit(X_train,y_train) clf_t.fit(X_train_t,y_train_t) print("Original DataSet: test score=%s"%(clf.score(X_test,y_test))) print("Selected DataSet: test score=%s"%(clf_t.score(X_test_t,y_test_t))) #调用test_compare_with_no_feature_selection() test_compare_with_no_feature_selection()