调用sklearn岭回归
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from sklearn import linear_model
import random
from numpy import genfromtxt
def main():
data = genfromtxt("longley.csv",delimiter=",")
#print (data)
x_ =data[1:,2:-1]
y_ =data[1:,1]
#print (x_,"\n")
print (y_,"\n")
alphas_test = np.linspace(0.001,1)
##print (alphas_test)生成50个值
model = linear_model.RidgeCV(alphas = alphas_test,store_cv_values = True)
model.fit(x_,y_)
#岭系数
print ("alpha",model.alpha_)
#loss值
print("loss:",model.cv_values_.shape)
#画图
plt.plot(alphas_test,model.cv_values_.mean(axis=0))
plt.plot(model.alpha_,min(model.cv_values_.mean(axis=0)),"ro")
plt.show()
#predict
for i in range(len(y_)):
num = model.predict(x_[i,np.newaxis])
print ("predict = {}, real = {}".format(num,y_[i]))
main()
alpha 0.3272040816326531
loss: (16, 50)
predict = [83.63894532], real = 83.0
predict = [86.9209978], real = 88.5
predict = [88.11030853], real = 88.2
predict = [90.83378316], real = 89.5
predict = [96.19447165], real = 96.2
predict = [97.78575225], real = 98.1
predict = [98.32626763], real = 99.0
predict = [99.99016485], real = 100.0
predict = [103.23182808], real = 101.2
predict = [105.07674862], real = 104.6
predict = [107.40141271], real = 108.4
predict = [109.49953967], real = 110.8
predict = [112.85570844], real = 112.6
predict = [113.94227953], real = 114.2
predict = [115.42620979], real = 115.7
predict = [117.66558197], real = 116.9
手写标准方程岭回归
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt
from sklearn import linear_model
import random
from numpy import genfromtxt
def re_least_square(X,Y):
l=0.0001
one_mat= np.identity(X.shape[1])
X = np.mat(X)
Y = np.mat(Y)
return ((X.T*X + l*one_mat).I*X.T*Y)
def predict_(w,x):
result = 0
for i in range(len(w)-1):
result += x[0][i]*w[i+1][0]
return result
def main():
data = genfromtxt("longley.csv",delimiter=",")
x_ =data[1:,2:]
y_ =data[1:,1,np.newaxis]
x_data = np.concatenate((np.ones((16,1)),x_),axis=1)
w = re_least_square(x_data,y_)
for i in range(len(y_)):
num = predict_(w,x_[i,np.newaxis])
print ("predict = {}, real = {}".format(num,y_[i][0]))
main()
predict = [[82.15855411]], real = 83.0
predict = [[85.62484719]], real = 88.5
predict = [[86.76475802]], real = 88.2
predict = [[89.67781832]], real = 89.5
predict = [[94.81566671]], real = 96.2
predict = [[96.54643031]], real = 98.1
predict = [[96.98592161]], real = 99.0
predict = [[98.63741912]], real = 100.0
predict = [[101.96261101]], real = 101.2
predict = [[103.68080448]], real = 104.6
predict = [[106.13579146]], real = 108.4
predict = [[108.24878847]], real = 110.8
predict = [[111.69617751]], real = 112.6
predict = [[112.6864056]], real = 114.2
predict = [[113.92919783]], real = 115.7
predict = [[116.26300165]], real = 116.9
数据
longley.csv