PCA:主成分分析
svm的两个参数:c
惩罚系数,即对误差的宽容度。c越高,越不能容忍出现误差,容易过拟合;c越小,容易欠拟合,c过大或过小,泛化能力都会变差gamma
是选择rbf
函数作为kernal后,该函数自带的一个参数,隐含的决定了数据映射到新的特征空间后的分布,gmma越大,支持向量越少,gmma越小,支持向量越多,支持向量的个数影响训练与预测的速度
from sklearn.svm import SVC
from sklearn.datasets import fetch_lfw_people
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.model_selection import GridSearchCV #调节参数
from sklearn.model_selection import train_test_split
import logging
import matplotlib.pyplot as plt
logging.basicConfig(level=logging.INFO)
data = fetch_lfw_people(min_faces_per_person=70,resize=1,
slice_=(slice(0,250,None),slice(0,250,None)))
images = data.images
target_name = data.target_names
x = data.data
y = data.target
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.12)
'''
import time
start = time.time()
svm = SVC()
svm.fit(x_train,y_train)
print(svm.score(x_test,y_test)) #0.61
end = time.time()
print(end-start)
'''
# newsvc = SVC()
# newsvc.fit(x_train,y_train)
# print(newsvc.score(x_test,y_test)) #0.67
pac = PCA(n_components=150, #主成分
svd_solver='randomized', #随机打乱
whiten=True) #baihua
pac.fit(x)
x_train_pca=pac.transform(x_train) #yuchuli x_train
x_test_pca = pac.transform(x_test)
# svc_better = SVC()
# svc_better.fit(x_train_pca,y_train)
# print(svc_better.score(x_test_pca,y_test)) #0.69
g_svc = SVC()
param_grid = {"C":[0.2,0.5,0.8,1,5,7,9],
"gamma":[0.001,0.002,0.004,0.02,0.05,0.06]} # 参数,调优,选择一个最好的比例
gcv = GridSearchCV(g_svc,param_grid=param_grid) #创建一个自动调优
gcv.fit(x_train_pca,y_train)
svc_last = SVC(C=5.0,gamma=0.001)
svc_last.fit(x_train_pca,y_train)
print(svc_last.score(x_test_pca,y_test)) # 0.8
y_new = svc_last.predict(x_test_pca)
y_last = svc_last.predict(x_test_pca[:20])
# plt.imshow(data.images[10])
# print(data.images[10].shape)
# print(images.shape)
# plt.savefig('11.png')