单变量、多变量线性回归练习

2023-12-10 18:59:27

1.单变量线性回归练习

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

# 1、导入相关包
# 2、手写读取数据
data=[[230.1,37.8,69.2,22.1],[44.5,39.3,45.1,10.4],[17.2,45.9,69.3,9.3],
      [151.5,41.3,58.5,18.5],[180.8,10.8,58.4,12.9],[8.7,48.9,75,7.2],
      [57.5,32.8,23.5,11.8],[120.2,19.6,11.6,13.2],[8.6,2.1,1,4.8],
      [199.8,2.6,21.2,10.6],[66.1,5.8,24.2,8.6],[214.7,24,4,17.4],
      [23.8,35.1,65.9,9.2],[97.5,7.6,7.2,9.7]]
index=np.arange(1,15)
columns=['TV','Radio','Newspaper','Sales']
df=pd.DataFrame(data,index,columns)
# print(df)
# 3、获取三个特征向量
# x=df[['TV','Radio','Newspaper']]
# x=np.c_[x]
x1=df['TV']
x1=np.c_[x1]
# print('x1=',x1)
x2=df['Radio']
x2=np.c_[x2]
x3=df['Newspaper']
x3=np.c_[x3]
# print(x1)
# 4、获取标签向量
#对y
y=df['Sales']
y=np.c_[y]
y=y.reshape((1,len(x1)))
y=y[0]
print(y)
m=x1.shape[0]
print(m)
#对x和x1
a=np.ones(m)
# print(a)

#缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s

x1=suofang(x1)
x11=np.c_[a,x1]
# x=np.c_[a,x]
x22=np.c_[a,x2]
x33=np.c_[a,x3]
print(x11)
# 5、建立单变量线性回归模型
# 6、建立线性模型
def model(x,theta):
    h=x.dot(theta)
    return h
# 7、代价函数
def cost(h,y):
    m=h.shape[0]
    j=1/(2*m)*np.sum((h-y)**2)
    return j
# 8、梯度下降函数
def gradeDecline(xx,y,alpha,nums):
    m,n=xx.shape
    theta=np.zeros(n)   
    j=np.ones(nums)
    for i in range(nums):
        h=model(xx,theta)
        j[i]=cost(h,y)
        dieta=(1/m)*xx.T.dot(h-y)
        theta=theta-alpha*dieta
    return theta,j,h
# 9、以tv列为特征训练模型，输出最终代价函数值
theta,j,h=gradeDecline(x11,y,0.000051,50000)
# print(theta)
# 10、画出散点与预测曲线
plt.subplot(321)
plt.scatter(x1,y)
plt.plot(x1,h)
plt.subplot(322)
plt.plot(j)

# 11、以radio列为特征训练模型，输出最终代价函数值
theta1,j,h=gradeDecline(x22,y,0.000045,50000)
print(j)
# 12、画出散点与预测曲线
plt.subplot(323)
plt.scatter(x2,y)
plt.plot(x2,h)
plt.subplot(324)
plt.plot(j)
print('h=',h)
# 13、以newspaper列为特征训练模型，输出最终代价函数值
theta2,j,h=gradeDecline(x3,y,0.0000034,500000)
print(j)
# # 14、画出散点图
plt.subplot(325)
plt.scatter(x3,y)
plt.plot(x3,h)
plt.subplot(326)
plt.plot(j)
plt.show()

2. 多变量线性回归

练习1：

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import warnings
warnings.filterwarnings('ignore')


data=[[230.1,37.8,69.2,22.1],[44.5,39.3,45.1,10.4],[17.2,45.9,69.3,9.3],
      [151.5,41.3,58.5,18.5],[180.8,10.8,58.4,12.9],[8.7,48.9,75,7.2],
      [57.5,32.8,23.5,11.8],[120.2,19.6,11.6,13.2],[8.6,2.1,1,4.8],
      [199.8,2.6,21.2,10.6],[66.1,5.8,24.2,8.6],[214.7,24,4,17.4],
      [23.8,35.1,65.9,9.2],[97.5,7.6,7.2,9.7]]
index=np.arange(1,15)
columns=['TV','Radio','Newspaper','Sales']
df=pd.DataFrame(data,index,columns)

x=df[['TV','Radio','Newspaper']]
x=np.c_[x]

y=df['Sales']
y=np.c_[y]
y=y.reshape((1,len(x)))
y=y[0]
print(y)

m=x.shape[0]
print(m)
#对x和x1
a=np.ones(m)
# print(a)

#缩放
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    s=(x-xmin)/(xmax-xmin)
    return s

x=suofang(x)
xx=np.c_[a,x]
# 6、建立线性模型
def model(x,theta):
    h=x.dot(theta)
    return h
# 7、代价函数
def cost(h,y):
    m=h.shape[0]
    j=1/(2*m)*np.sum((h-y)**2)
    return j
# 8、梯度下降函数
def gradeDecline(xx,y,alpha,nums):
    m,n=xx.shape
    theta=np.zeros((n,1))
    j=np.ones(nums)
    for i in range(nums):
        h=model(xx,theta)
        j[i]=cost(h,y)
        dieta=(1/m)*xx.T.dot(h-y)
        theta=theta-alpha*dieta
    return theta,j,h
# 9、以tv列为特征训练模型，输出最终代价函数值
theta,j,h=gradeDecline(xx,y,0.000051,50000)

plt.subplot(121)
plt.scatter(y,y)
plt.plot(x,h)
plt.subplot(122)
plt.plot(j)
plt.show()

练习2：

import matplotlib.pyplot as plt
import numpy as np

# 一、请通过Python实现多元线性回归模型，并用此模型预测y，具体要求如下：
# 1、编写线性模型函数
def model(x,theta):
    h=x.dot(theta)
    return h

# 2、编写代价函数
def cost(h,y):
    j=1/(2*m)*np.sum((h-y)**2)
    return j
# 3、编写梯度下降函数（7分）
def gradeDecline(xx,y,alpha,nums):
    m,n=xx.shape
    theta=np.zeros((n,1))
    j=np.zeros(nums)
# 4、梯度编写正确
    for i in range(nums):
        h=model(xx,theta)
        j[i]=cost(h,y)
        dieta=(1/m)*xx.T.dot(h-y)
        theta=theta-alpha*dieta
    return theta,j,h
# 5、编写精度函数
def score(x,y,theta):
    h=model(x,theta)
    u=np.sum((h-y)**2)
    v=np.sum((y-np.mean(y))**2)
    return 1-u/v
    
# 6. 缩放函数
def suofang(x):
    xmin=np.min(x,axis=0)
    xmax=np.max(x,axis=0)
    x=(x-xmin)/(xmax-xmin)
    return x




if __name__ == '__main__':

# 7、加载数据集，data.txt（7分）
    data=np.loadtxt('data1.txt',delimiter=',')
    # print(data)

# 8、切分特征与标签（7分）
    x=data[:,:-1]
    y=data[:,-1:]
    # print(x)
    # print(y)
    # m,n=x.shape

# 8.1洗牌
    np.random.seed(4)
    m,n=data.shape
    order=np.random.permutation(m)
    x=x[order]
    y=y[order]


# 9、归一化特征缩放（7分）
    x=suofang(x)
# 10、切分训练集，测试集（7分）
    xx=np.c_[np.ones(len(x)),x]
    print(xx)
    trunnum=int(len(x)*0.7)
    trainx=xx[:trunnum,:]
    testx=xx[trunnum:,:]
    trainy=y[:trunnum,:]
    testy=y[trunnum:,:]

# 11、训练模型
    theta,j,h=gradeDecline(trainx,trainy,0.001,500000)
    print(theta)
# 12、画出代价函数图，并调整超参数
    plt.subplot(121)
    plt.scatter(trainy,trainy)
    plt.scatter(testy,model(testx,theta))
    plt.subplot(122)
    plt.plot(j)
    plt.show()
# 13、输出训练集精度
    s=score(trainx,trainy,theta)
    print('训练集精度',s)

# 14.输出测试集精度
    s1=score(testx,testy,theta)
    print('测试集精度',s1)

码农公寓

1.单变量线性回归练习

2. 多变量线性回归

相关文章