利用python实现简单的线性回归对房屋面积进行预测
# -*-coding:utf-8 -*-
'''
Created on 2016年12月15日 @author: lpworkdstudy
'''
import numpy as np
from numpy.core.multiarray import dtype
import matplotlib.pyplot as plt filename = "ex1data1.txt"
alpha = 0.01 f = open(filename,"r")
data = []
y = []
for item in f:
item = item.rstrip().split(",")
data.append(item[:-1])
y.append(item[-1:])
Data = np.array(data,dtype= "float64")
Y = np.array(y,dtype = "float64")
Y = (Y-Y.mean())/(Y.max()-Y.min())
One = np.ones(Data.shape[0],dtype = "float64")
Data = np.insert(Data, 0, values=One, axis=1)
for i in range(1,Data.shape[1]):
Data[:,i] = (Data[:,i]-Data[:,i].mean())/(Data[:,i].max()-Data[:,i].min())
theta = np.zeros((1,Data.shape[1]),dtype= "float64") def CostFunction(Data,Y,theta):
h = np.dot(Data,theta.T)
cost = 1/float((2*Data.shape[0]))*np.sum((h-Y)**2)
return cost
def GradientDescent(Data,Y,theta,alpha):
costList = []
for i in range(10000):
theta = theta- (alpha/Data.shape[0]*np.dot(Data.T,(np.dot(Data,theta.T)-Y))).T
cost = CostFunction(Data, Y, theta)
costList.append(cost) plt.plot(range(10000),costList)
plt.xlabel("the no. of iterations")
plt.ylabel("cost Error")
plt.title("LinearRegression")
plt.show()
return theta
if __name__ == "__main__":
weight = GradientDescent(Data,Y,theta,alpha)
print weight
cost = CostFunction(Data, Y, weight)
print cost
上图是Loss Error 随 迭代次数变化的曲线,显然,在迭代4000次左右后随着迭代次数增加,loss下降缓慢。
注:在这里只是简单的利用LMS Loss Function 和 GD对Linear Regression进行了编写,并没有预测