1.单变量线性回归练习
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
# 1、导入相关包
# 2、手写读取数据
data=[[230.1,37.8,69.2,22.1],[44.5,39.3,45.1,10.4],[17.2,45.9,69.3,9.3],
[151.5,41.3,58.5,18.5],[180.8,10.8,58.4,12.9],[8.7,48.9,75,7.2],
[57.5,32.8,23.5,11.8],[120.2,19.6,11.6,13.2],[8.6,2.1,1,4.8],
[199.8,2.6,21.2,10.6],[66.1,5.8,24.2,8.6],[214.7,24,4,17.4],
[23.8,35.1,65.9,9.2],[97.5,7.6,7.2,9.7]]
index=np.arange(1,15)
columns=['TV','Radio','Newspaper','Sales']
df=pd.DataFrame(data,index,columns)
# print(df)
# 3、获取三个特征向量
# x=df[['TV','Radio','Newspaper']]
# x=np.c_[x]
x1=df['TV']
x1=np.c_[x1]
# print('x1=',x1)
x2=df['Radio']
x2=np.c_[x2]
x3=df['Newspaper']
x3=np.c_[x3]
# print(x1)
# 4、获取标签向量
#对y
y=df['Sales']
y=np.c_[y]
y=y.reshape((1,len(x1)))
y=y[0]
print(y)
m=x1.shape[0]
print(m)
#对x和x1
a=np.ones(m)
# print(a)
#缩放
def suofang(x):
xmin=np.min(x,axis=0)
xmax=np.max(x,axis=0)
s=(x-xmin)/(xmax-xmin)
return s
x1=suofang(x1)
x11=np.c_[a,x1]
# x=np.c_[a,x]
x22=np.c_[a,x2]
x33=np.c_[a,x3]
print(x11)
# 5、建立单变量线性回归模型
# 6、建立线性模型
def model(x,theta):
h=x.dot(theta)
return h
# 7、代价函数
def cost(h,y):
m=h.shape[0]
j=1/(2*m)*np.sum((h-y)**2)
return j
# 8、梯度下降函数
def gradeDecline(xx,y,alpha,nums):
m,n=xx.shape
theta=np.zeros(n)
j=np.ones(nums)
for i in range(nums):
h=model(xx,theta)
j[i]=cost(h,y)
dieta=(1/m)*xx.T.dot(h-y)
theta=theta-alpha*dieta
return theta,j,h
# 9、以tv列为特征训练模型,输出最终代价函数值
theta,j,h=gradeDecline(x11,y,0.000051,50000)
# print(theta)
# 10、画出散点与预测曲线
plt.subplot(321)
plt.scatter(x1,y)
plt.plot(x1,h)
plt.subplot(322)
plt.plot(j)
# 11、以radio列为特征训练模型,输出最终代价函数值
theta1,j,h=gradeDecline(x22,y,0.000045,50000)
print(j)
# 12、画出散点与预测曲线
plt.subplot(323)
plt.scatter(x2,y)
plt.plot(x2,h)
plt.subplot(324)
plt.plot(j)
print('h=',h)
# 13、以newspaper列为特征训练模型,输出最终代价函数值
theta2,j,h=gradeDecline(x3,y,0.0000034,500000)
print(j)
# # 14、画出散点图
plt.subplot(325)
plt.scatter(x3,y)
plt.plot(x3,h)
plt.subplot(326)
plt.plot(j)
plt.show()
2. 多变量线性回归
练习1:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')
data=[[230.1,37.8,69.2,22.1],[44.5,39.3,45.1,10.4],[17.2,45.9,69.3,9.3],
[151.5,41.3,58.5,18.5],[180.8,10.8,58.4,12.9],[8.7,48.9,75,7.2],
[57.5,32.8,23.5,11.8],[120.2,19.6,11.6,13.2],[8.6,2.1,1,4.8],
[199.8,2.6,21.2,10.6],[66.1,5.8,24.2,8.6],[214.7,24,4,17.4],
[23.8,35.1,65.9,9.2],[97.5,7.6,7.2,9.7]]
index=np.arange(1,15)
columns=['TV','Radio','Newspaper','Sales']
df=pd.DataFrame(data,index,columns)
x=df[['TV','Radio','Newspaper']]
x=np.c_[x]
y=df['Sales']
y=np.c_[y]
y=y.reshape((1,len(x)))
y=y[0]
print(y)
m=x.shape[0]
print(m)
#对x和x1
a=np.ones(m)
# print(a)
#缩放
def suofang(x):
xmin=np.min(x,axis=0)
xmax=np.max(x,axis=0)
s=(x-xmin)/(xmax-xmin)
return s
x=suofang(x)
xx=np.c_[a,x]
# 6、建立线性模型
def model(x,theta):
h=x.dot(theta)
return h
# 7、代价函数
def cost(h,y):
m=h.shape[0]
j=1/(2*m)*np.sum((h-y)**2)
return j
# 8、梯度下降函数
def gradeDecline(xx,y,alpha,nums):
m,n=xx.shape
theta=np.zeros((n,1))
j=np.ones(nums)
for i in range(nums):
h=model(xx,theta)
j[i]=cost(h,y)
dieta=(1/m)*xx.T.dot(h-y)
theta=theta-alpha*dieta
return theta,j,h
# 9、以tv列为特征训练模型,输出最终代价函数值
theta,j,h=gradeDecline(xx,y,0.000051,50000)
plt.subplot(121)
plt.scatter(y,y)
plt.plot(x,h)
plt.subplot(122)
plt.plot(j)
plt.show()
练习2:
import matplotlib.pyplot as plt
import numpy as np
# 一、请通过Python实现多元线性回归模型,并用此模型预测y,具体要求如下:
# 1、编写线性模型函数
def model(x,theta):
h=x.dot(theta)
return h
# 2、编写代价函数
def cost(h,y):
j=1/(2*m)*np.sum((h-y)**2)
return j
# 3、编写梯度下降函数(7分)
def gradeDecline(xx,y,alpha,nums):
m,n=xx.shape
theta=np.zeros((n,1))
j=np.zeros(nums)
# 4、梯度编写正确
for i in range(nums):
h=model(xx,theta)
j[i]=cost(h,y)
dieta=(1/m)*xx.T.dot(h-y)
theta=theta-alpha*dieta
return theta,j,h
# 5、编写精度函数
def score(x,y,theta):
h=model(x,theta)
u=np.sum((h-y)**2)
v=np.sum((y-np.mean(y))**2)
return 1-u/v
# 6. 缩放函数
def suofang(x):
xmin=np.min(x,axis=0)
xmax=np.max(x,axis=0)
x=(x-xmin)/(xmax-xmin)
return x
if __name__ == '__main__':
# 7、加载数据集,data.txt(7分)
data=np.loadtxt('data1.txt',delimiter=',')
# print(data)
# 8、切分特征与标签(7分)
x=data[:,:-1]
y=data[:,-1:]
# print(x)
# print(y)
# m,n=x.shape
# 8.1洗牌
np.random.seed(4)
m,n=data.shape
order=np.random.permutation(m)
x=x[order]
y=y[order]
# 9、归一化特征缩放(7分)
x=suofang(x)
# 10、切分训练集,测试集(7分)
xx=np.c_[np.ones(len(x)),x]
print(xx)
trunnum=int(len(x)*0.7)
trainx=xx[:trunnum,:]
testx=xx[trunnum:,:]
trainy=y[:trunnum,:]
testy=y[trunnum:,:]
# 11、训练模型
theta,j,h=gradeDecline(trainx,trainy,0.001,500000)
print(theta)
# 12、画出代价函数图,并调整超参数
plt.subplot(121)
plt.scatter(trainy,trainy)
plt.scatter(testy,model(testx,theta))
plt.subplot(122)
plt.plot(j)
plt.show()
# 13、输出训练集精度
s=score(trainx,trainy,theta)
print('训练集精度',s)
# 14.输出测试集精度
s1=score(testx,testy,theta)
print('测试集精度',s1)