Python机器学习:KNN算法05f超参数

超参数 在运行机器学习方法之前需要制定的参数 knn默认值为5(经验值)具体还得实验搜索..

依然使用手写数字数据集

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn import datasets
digits = datasets.load_digits()
#使用scikit-learn中的accuracy_score
from sklearn.model_selection import train_test_split

X = digits.data
y = digits.target
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,random_state=666)#为了使实验过程可重复传入随机种子
#使用scikit-learn中的accuracy_score
from sklearn.neighbors import KNeighborsClassifier

my_knn_clf = KNeighborsClassifier(n_neighbors = 3)
my_knn_clf.fit(X_train,y_train)
y_predict = my_knn_clf.predict(X_test)
my_knn_clf.score(X_test,y_test)
0.9888888888888889

寻找最好的k,最后k=4

#寻找最好的K
best_score = 0.0
best_k = -1
for k in range(1,11):
    knn_clf = KNeighborsClassifier(n_neighbors = k)
    knn_clf.fit(X_train,y_train)
    score = knn_clf.score(X_test,y_test)
    print(score)
    print(k)
    if score > best_score:
        best_k = k
        best_score = score
print('best_k=',best_k)
print('best_score=',best_score)
0.9833333333333333
1
0.9888888888888889
2
0.9888888888888889
3
0.9916666666666667
4
0.9888888888888889
5
0.9888888888888889
6
0.9861111111111112
7
0.9861111111111112
8
0.9833333333333333
9
0.9833333333333333
10
best_k= 4
best_score= 0.9916666666666667

考虑距离权重

best_method = ""
best_score = 0.0
best_k = -1
for method in ["uniform","distance"]:#考虑距离和不考虑距离
    for k in range(1,11):
        knn_clf = KNeighborsClassifier(n_neighbors = k,weights=method)
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        print(method)
        print(score)
        print(k)
        if score > best_score:
            best_k = k
            best_score = score
            best_method = method

print('best_k=',best_k)
print('best_score=',best_score)
print('best_method',best_method)
uniform
0.9833333333333333
1
uniform
0.9888888888888889
2
uniform
0.9888888888888889
3
uniform
0.9916666666666667
4
uniform
0.9888888888888889
5
uniform
0.9888888888888889
6
uniform
0.9861111111111112
7
uniform
0.9861111111111112
8
uniform
0.9833333333333333
9
uniform
0.9833333333333333
10
distance
0.9833333333333333
1
distance
0.9861111111111112
2
distance
0.9888888888888889
3
distance
0.9888888888888889
4
distance
0.9888888888888889
5
distance
0.9888888888888889
6
distance
0.9888888888888889
7
distance
0.9888888888888889
8
distance
0.9861111111111112
9
distance
0.9861111111111112
10
best_k= 4
best_score= 0.9916666666666667
best_method uniform

搜索明可夫斯基距离相应的p,weigth必须为distance

#更多关于距离的定义
#搜索明可夫斯基距离相应的p
best_score = 0.0
best_k = -1
best_p = -1
import time
start = time.time()
for k in range(1,11):
    for p in range(1,6):
        knn_clf = KNeighborsClassifier(n_neighbors = k,weights="distance",p = p)
        knn_clf.fit(X_train,y_train)
        score = knn_clf.score(X_test,y_test)
        print(method)
        print(score)
        print(k)
        if score > best_score:
            best_k = k
            best_p = p
            best_score = score
print('best_p=',best_p)
print('best_k=',best_k)
print('best_score=',best_score)
runtime = time.time() - start
print(runtime)
distance
0.9861111111111112
1
distance
0.9833333333333333
1
distance
0.9861111111111112
1
distance
0.9861111111111112
1
distance
0.9861111111111112
1
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9861111111111112
2
distance
0.9833333333333333
3
distance
0.9888888888888889
3
distance
0.9833333333333333
3
distance
0.9833333333333333
3
distance
0.9833333333333333
3
distance
0.9833333333333333
4
distance
0.9888888888888889
4
distance
0.9861111111111112
4
distance
0.9833333333333333
4
distance
0.9833333333333333
4
distance
0.9888888888888889
5
distance
0.9888888888888889
5
distance
0.9861111111111112
5
distance
0.9861111111111112
5
distance
0.9805555555555555
5
distance
0.9833333333333333
6
distance
0.9888888888888889
6
distance
0.9888888888888889
6
distance
0.9833333333333333
6
distance
0.9805555555555555
6
distance
0.9861111111111112
7
distance
0.9888888888888889
7
distance
0.9861111111111112
7
distance
0.9833333333333333
7
distance
0.9833333333333333
7
distance
0.9861111111111112
8
distance
0.9888888888888889
8
distance
0.9888888888888889
8
distance
0.9861111111111112
8
distance
0.9833333333333333
8
distance
0.9861111111111112
9
distance
0.9861111111111112
9
distance
0.9888888888888889
9
distance
0.9888888888888889
9
distance
0.9833333333333333
9
distance
0.9833333333333333
10
distance
0.9861111111111112
10
distance
0.9888888888888889
10
distance
0.9861111111111112
10
distance
0.9833333333333333
10
best_p= 2
best_k= 3
best_score= 0.9888888888888889
18.136173009872437

Python机器学习:KNN算法05f超参数
Python机器学习:KNN算法05f超参数
Python机器学习:KNN算法05f超参数
距离
Python机器学习:KNN算法05f超参数
Python机器学习:KNN算法05f超参数
Python机器学习:KNN算法05f超参数
Python机器学习:KNN算法05f超参数

上一篇:Best of Best系列(2)——ICCV


下一篇:pat 1012 The Best Rank