超参数 在运行机器学习方法之前需要制定的参数 knn默认值为5(经验值)具体还得实验搜索..
依然使用手写数字数据集
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
digits = datasets.load_digits()
#使用scikit-learn中的accuracy_score
from sklearn.model_selection import train_test_split
X = digits.data
y = digits.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=666)#为了使实验过程可重复传入随机种子
#使用scikit-learn中的accuracy_score
from sklearn.neighbors import KNeighborsClassifier
my_knn_clf = KNeighborsClassifier(n_neighbors = 3)
my_knn_clf.fit(X_train,y_train)
y_predict = my_knn_clf.predict(X_test)
my_knn_clf.score(X_test,y_test)
0.9888888888888889
寻找最好的k,最后k=4
#寻找最好的K
best_score = 0.0
best_k = -1
for k in range(1,11):
knn_clf = KNeighborsClassifier(n_neighbors = k)
knn_clf.fit(X_train,y_train)
score = knn_clf.score(X_test,y_test)
print(score)
print(k)
if score > best_score:
best_k = k
best_score = score
print('best_k=',best_k)
print('best_score=',best_score)
0.9833333333333333
1
0.9888888888888889
2
0.9888888888888889
3
0.9916666666666667
4
0.9888888888888889
5
0.9888888888888889
6
0.9861111111111112
7
0.9861111111111112
8
0.9833333333333333
9
0.9833333333333333
10
best_k= 4
best_score= 0.9916666666666667
考虑距离权重
best_method = ""
best_score = 0.0
best_k = -1
for method in ["uniform","distance"]:#考虑距离和不考虑距离
for k in range(1,11):
knn_clf = KNeighborsClassifier(n_neighbors = k,weights=method)
knn_clf.fit(X_train,y_train)
score = knn_clf.score(X_test,y_test)
print(method)
print(score)
print(k)
if score > best_score:
best_k = k
best_score = score
best_method = method
print('best_k=',best_k)
print('best_score=',best_score)
print('best_method',best_method)
uniform
0.9833333333333333
1
uniform
0.9888888888888889
2
uniform
0.9888888888888889
3
uniform
0.9916666666666667
4
uniform
0.9888888888888889
5
uniform
0.9888888888888889
6
uniform
0.9861111111111112
7
uniform
0.9861111111111112
8
uniform
0.9833333333333333
9
uniform
0.9833333333333333
10
distance
0.9833333333333333
1
distance
0.9861111111111112
2
distance
0.9888888888888889
3
distance
0.9888888888888889
4
distance
0.9888888888888889
5
distance
0.9888888888888889
6
distance
0.9888888888888889
7
distance
0.9888888888888889
8
distance
0.9861111111111112
9
distance
0.9861111111111112
10
best_k= 4
best_score= 0.9916666666666667
best_method uniform
搜索明可夫斯基距离相应的p,weigth必须为distance
#更多关于距离的定义
#搜索明可夫斯基距离相应的p
best_score = 0.0
best_k = -1
best_p = -1
import time
start = time.time()
for k in range(1,11):
for p in range(1,6):
knn_clf = KNeighborsClassifier(n_neighbors = k,weights="distance",p = p)
knn_clf.fit(X_train,y_train)
score = knn_clf.score(X_test,y_test)
print(method)
print(score)
print(k)
if score > best_score:
best_k = k
best_p = p
best_score = score
print('best_p=',best_p)
print('best_k=',best_k)
print('best_score=',best_score)
runtime = time.time() - start
print(runtime)
distance
0.9861111111111112
1
distance
0.9833333333333333
1
distance
0.9861111111111112
1
distance
0.9861111111111112
1
distance
0.9861111111111112
1
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9833333333333333
3
distance
0.9888888888888889
3
distance
0.9833333333333333
3
distance
0.9833333333333333
3
distance
0.9833333333333333
3
distance
0.9833333333333333
4
distance
0.9888888888888889
4
distance
0.9861111111111112
4
distance
0.9833333333333333
4
distance
0.9833333333333333
4
distance
0.9888888888888889
5
distance
0.9888888888888889
5
distance
0.9861111111111112
5
distance
0.9861111111111112
5
distance
0.9805555555555555
5
distance
0.9833333333333333
6
distance
0.9888888888888889
6
distance
0.9888888888888889
6
distance
0.9833333333333333
6
distance
0.9805555555555555
6
distance
0.9861111111111112
7
distance
0.9888888888888889
7
distance
0.9861111111111112
7
distance
0.9833333333333333
7
distance
0.9833333333333333
7
distance
0.9861111111111112
8
distance
0.9888888888888889
8
distance
0.9888888888888889
8
distance
0.9861111111111112
8
distance
0.9833333333333333
8
distance
0.9861111111111112
9
distance
0.9861111111111112
9
distance
0.9888888888888889
9
distance
0.9888888888888889
9
distance
0.9833333333333333
9
distance
0.9833333333333333
10
distance
0.9861111111111112
10
distance
0.9888888888888889
10
distance
0.9861111111111112
10
distance
0.9833333333333333
10
best_p= 2
best_k= 3
best_score= 0.9888888888888889
18.136173009872437
距离