一、导入数据并参看形状
from sklearn.datasets import load_boston
import numpy as np
boston = load_boston()
boston_X = boston.data
boston_y = boston.target
print(boston_X.shape)
print(boston_y.shape)
train_set = np.random.choice([True, False], len(boston_y),p=[.75, .25])
print(train_set.shape)
二、导入高斯过程回归模块并选择默认参数实例化
from sklearn.gaussian_process import GaussianProcessRegressor
gpr = GaussianProcessRegressor()
gpr
三、导入高斯过程核函数并选择参数
from sklearn.gaussian_process.kernels import RBF, ConstantKernel as CK
mixed_kernel = kernel = CK(1.0, (1e-4, 1e4)) * RBF(10, (1e-4, 1e4)) #径向基径向基Radial basis function,简称RBF
四、将参数代入模块中并实例化
gpr = GaussianProcessRegressor(alpha=5,
n_restarts_optimizer=20,
kernel = mixed_kernel)
五、利用数据集训练并预测
gpr.fit(boston_X[train_set],boston_y[train_set])
test_preds = gpr.predict(boston_X[~train_set])
print(boston_X[~train_set].shape)
print(boston_X[train_set].shape)
六、选择交叉评价预测并绘图
from sklearn.model_selection import cross_val_predict
from matplotlib import pyplot as plt
%matplotlib inline
f, ax = plt.subplots(figsize=(10, 7), nrows=3) #3行
f.tight_layout()
ax[0].plot(range(len(test_preds)), test_preds,label='Predicted Values');
ax[0].plot(range(len(test_preds)), boston_y[~train_set],label='Actual Values');
ax[0].set_title("Predicted vs Actuals")
ax[0].legend(loc='best')
ax[1].plot(range(len(test_preds)),test_preds - boston_y[~train_set]);
ax[1].set_title("Plotted Residuals") #残差
ax[2].hist(test_preds - boston_y[~train_set]);
ax[2].set_title("Histogram of Residuals")
from sklearn.model_selection import cross_val_score
gpr5 = GaussianProcessRegressor(alpha=5,
n_restarts_optimizer=20,
kernel = mixed_kernel)
scores_5 = (cross_val_score(gpr5,
boston_X[train_set],
boston_y[train_set],
cv = 4,
scoring = 'neg_mean_absolute_error'))
def score_mini_report(scores_list):
print("List of scores: ", scores_list)
print("Mean of scores: ", scores_list.mean())
print("Std of scores: ", scores_list.std())
score_mini_report(scores_5)
gpr7 = GaussianProcessRegressor(alpha=7,
n_restarts_optimizer=20,
kernel = mixed_kernel)
scores_7 = (cross_val_score(gpr7,
boston_X[train_set],
boston_y[train_set],
cv = 4,
scoring = 'neg_mean_absolute_error'))
score_mini_report(scores_7)
from sklearn.model_selection import cross_val_score
gpr7n = GaussianProcessRegressor(alpha=7,
n_restarts_optimizer=20,
kernel = mixed_kernel,
normalize_y=True)
scores_7n = (cross_val_score(gpr7n,
boston_X[train_set],
boston_y[train_set],
cv = 4,
scoring = 'neg_mean_absolute_error'))
score_mini_report(scores_7n)
gpr7n.fit(boston_X[train_set],boston_y[train_set])
test_preds = gpr7n.predict(boston_X[~train_set])
gpr_new = GaussianProcessRegressor(alpha=boston_y[train_set]/4,
n_restarts_optimizer=20,
kernel = mixed_kernel)
test_preds, MSE = gpr7n.predict(boston_X[~train_set], return_std=True)
MSE[:5]
f, ax = plt.subplots(figsize=(7, 5))
n = MSE.shape[0]
rng = range(n)
ax.scatter(rng, test_preds[:n])
ax.errorbar(rng, test_preds[:n], yerr=1.96*MSE[:n])
ax.set_title("Predictions with Error Bars")
ax.set_xlim((-1, n));
参考文献
1.官网
2.Scikit-Learn cookbook