Scikit-Learn之利用高斯过程回归

一、导入数据并参看形状

from sklearn.datasets import load_boston
import numpy as np
boston = load_boston()
boston_X = boston.data
boston_y = boston.target

print(boston_X.shape)
print(boston_y.shape)
train_set = np.random.choice([True, False], len(boston_y),p=[.75, .25])
print(train_set.shape)

二、导入高斯过程回归模块并选择默认参数实例化

from sklearn.gaussian_process import GaussianProcessRegressor
gpr = GaussianProcessRegressor()
gpr

三、导入高斯过程核函数并选择参数

from sklearn.gaussian_process.kernels import RBF, ConstantKernel as CK
 
mixed_kernel = kernel = CK(1.0, (1e-4, 1e4)) * RBF(10, (1e-4, 1e4))  #径向基径向基Radial basis function,简称RBF

四、将参数代入模块中并实例化

gpr = GaussianProcessRegressor(alpha=5,
                                n_restarts_optimizer=20,
                                kernel = mixed_kernel)

五、利用数据集训练并预测

gpr.fit(boston_X[train_set],boston_y[train_set])

test_preds = gpr.predict(boston_X[~train_set])
print(boston_X[~train_set].shape)
print(boston_X[train_set].shape)

六、选择交叉评价预测并绘图

from sklearn.model_selection import cross_val_predict
 
from matplotlib import pyplot as plt
%matplotlib inline
 
f, ax = plt.subplots(figsize=(10, 7), nrows=3) #3行
f.tight_layout()
 
ax[0].plot(range(len(test_preds)), test_preds,label='Predicted Values');
ax[0].plot(range(len(test_preds)), boston_y[~train_set],label='Actual Values');
ax[0].set_title("Predicted vs Actuals")
ax[0].legend(loc='best')
 
ax[1].plot(range(len(test_preds)),test_preds - boston_y[~train_set]);
ax[1].set_title("Plotted Residuals") #残差
ax[2].hist(test_preds - boston_y[~train_set]);
ax[2].set_title("Histogram of Residuals")
from sklearn.model_selection import cross_val_score
 
gpr5 = GaussianProcessRegressor(alpha=5,
                                n_restarts_optimizer=20,
                                kernel = mixed_kernel)
 
scores_5 = (cross_val_score(gpr5,
                             boston_X[train_set],
                             boston_y[train_set],
                             cv = 4,
                             scoring = 'neg_mean_absolute_error'))
def score_mini_report(scores_list):
     print("List of scores: ", scores_list)
     print("Mean of scores: ", scores_list.mean()) 
     print("Std of scores: ", scores_list.std())
     
score_mini_report(scores_5)
gpr7 = GaussianProcessRegressor(alpha=7,
                                n_restarts_optimizer=20,
                                kernel = mixed_kernel)
 
scores_7 = (cross_val_score(gpr7,
                             boston_X[train_set],
                             boston_y[train_set],
                             cv = 4,
                             scoring = 'neg_mean_absolute_error'))
score_mini_report(scores_7)
from sklearn.model_selection import cross_val_score
 
gpr7n = GaussianProcessRegressor(alpha=7,
                                n_restarts_optimizer=20,
                                kernel = mixed_kernel,
                                normalize_y=True)
 
scores_7n = (cross_val_score(gpr7n,
                             boston_X[train_set],
                             boston_y[train_set],
                             cv = 4,
                             scoring = 'neg_mean_absolute_error'))
score_mini_report(scores_7n)
gpr7n.fit(boston_X[train_set],boston_y[train_set])
test_preds = gpr7n.predict(boston_X[~train_set])
gpr_new = GaussianProcessRegressor(alpha=boston_y[train_set]/4,
                                  n_restarts_optimizer=20,
                                  kernel = mixed_kernel)
test_preds, MSE = gpr7n.predict(boston_X[~train_set], return_std=True)
MSE[:5]
f, ax = plt.subplots(figsize=(7, 5))
n = MSE.shape[0]
rng = range(n)
ax.scatter(rng, test_preds[:n])
ax.errorbar(rng, test_preds[:n], yerr=1.96*MSE[:n])
ax.set_title("Predictions with Error Bars")
ax.set_xlim((-1, n));

Scikit-Learn之利用高斯过程回归
Scikit-Learn之利用高斯过程回归
参考文献
1.官网
2.Scikit-Learn cookbook

上一篇:Boston和MIT研究人员利用脑电信号实时控制机器人


下一篇:理想国线性回归算法入门教程