支持向量机(乳腺癌案例)

# z支持向量机
from sklearn.svm import SVC
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from time import time
import datetime

data = load_breast_cancer()
X = data.data
y = data.target
print(X.shape)
print(np.unique(y))
plt.scatter(X[:,0],X[:,1],c=y)
plt.show()
# 进行无量纲化
X = StandardScaler().fit_transform(X)
from sklearn.decomposition import PCA
X_dr = PCA(2).fit_transform(X)

Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
kernels = ["linear","poly","rbf","sigmoid"]
for kernel in kernels:
    time0 = time()
    clf = SVC(kernel=kernel,
              gamma="auto",
              degree=1,
              cache_size=5000#电脑内存(MB)
              ).fit(Xtrain,Ytrain)
    print(kernel,":",clf.score(Xtest,Ytest))

rbf调参

score=[]
gamme_range = np.logspace(-10,1,50)
for i in gamme_range:
    clf = SVC(kernel="rbf",
              gamma=i,
              degree=1,
              cache_size=5000#电脑内存(MB)
              ).fit(Xtrain,Ytrain)
    score.append(clf.score(Xtest,Ytest))
print(max(score),gamme_range[score.index(max(score))])
plt.plot(gamme_range,score)
plt.show()

多项式网格搜索调参

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
time0 = time()
gamme_range = np.logspace(-10,1,50)
coef0_range = np.linspace(0,5,10)
param_grid = dict(gamma=gamme_range,
                  coef0=coef0_range)
cv=StratifiedShuffleSplit(n_splits=5,test_size=0.3,random_state=420)
grid = GridSearchCV(SVC(
    kernel="poly",degree=1,cache_size=5000#电脑内存(MB)
),param_grid=param_grid,cv=cv)
grid.fit(X,y)
print(grid.best_params_,":",grid.best_score_)

 

上一篇:天池-车辆产品聚类分析-积累笔记


下一篇:Python手写字母识别