import numpy as np import matplotlib.pyplot as plt from sklearn import datasets """ LinearRegression """ # 数据准备 boston = datasets.load_boston() X = boston.data y = boston.target # 数据处理 X = X[y < 50.0] y = y[y < 50.0] # print(X.shape) # 数据分割 from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=666) # 多元线性回归方程参数求解 from sklearn.linear_model import LinearRegression lin_reg = LinearRegression() lin_reg.fit(X_train, y_train) # 系数和截距 print(lin_reg.coef_, lin_reg.intercept_) # R² r_lin = lin_reg.score(X_test, y_test) print(r_lin) """ kNN Regressor """ from sklearn.neighbors import KNeighborsRegressor knn_reg = KNeighborsRegressor() knn_reg.fit(X_train, y_train) r_knn = knn_reg.score(X_test, y_test) print(r_knn) # 参数调节——网格搜索超参数 from sklearn.model_selection import GridSearchCV param_grid = [ { "weights":["uniform"], "n_neighbors":[i for i in range(1, 11)] }, { "weights":["distance"], "n_neighbors":[i for i in range(1, 11)], "p":[i for i in range(1, 6)] } ] knn_reg = KNeighborsRegressor() grid_search = GridSearchCV(knn_reg, param_grid, n_jobs=-1, verbose=1) grid_search.fit(X_train, y_train) # 最好的超参数 best_p = grid_search.best_params_ # 此参数下的score best_s = grid_search.score(X_test, y_test) print(best_p) print(best_s)
[-1.15625837e-01 3.13179564e-02 -4.35662825e-02 -9.73281610e-02 -1.09500653e+01 3.49898935e+00 -1.41780625e-02 -1.06249020e+00 2.46031503e-01 -1.23291876e-02 -8.79440522e-01 8.31653623e-03 -3.98593455e-01] 32.59756158869959 0.8009390227581041 0.602674505080953 [Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers. Fitting 5 folds for each of 60 candidates, totalling 300 fits [Parallel(n_jobs=-1)]: Done 41 tasks | elapsed: 1.0s [Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 1.3s finished {'n_neighbors': 6, 'p': 1, 'weights': 'distance'} 0.7353138117643773