通过学习斯坦福公开课的线性规划和梯度下降,参考他人代码自己做了测试,写了个类以后有时间再去扩展,代码注释以后再加,作业好多:
import numpy as np
import matplotlib.pyplot as plt
import random class dataMinning:
datasets = []
labelsets = [] addressD = '' #Data folder
addressL = '' #Label folder npDatasets = np.zeros(1)
npLabelsets = np.zeros(1) cost = []
numIterations = 0
alpha = 0
theta = np.ones(2)
#pCols = 0
#dRows = 0
def __init__(self,addressD,addressL,theta,numIterations,alpha,datasets=None):
if datasets is None:
self.datasets = []
else:
self.datasets = datasets
self.addressD = addressD
self.addressL = addressL
self.theta = theta
self.numIterations = numIterations
self.alpha = alpha def readFrom(self):
fd = open(self.addressD,'r')
for line in fd:
tmp = line[:-1].split()
self.datasets.append([int(i) for i in tmp])
fd.close()
self.npDatasets = np.array(self.datasets) fl = open(self.addressL,'r')
for line in fl:
tmp = line[:-1].split()
self.labelsets.append([int(i) for i in tmp])
fl.close() tm = []
for item in self.labelsets:
tm = tm + item
self.npLabelsets = np.array(tm) def genData(self,numPoints,bias,variance):
self.genx = np.zeros(shape = (numPoints,2))
self.geny = np.zeros(shape = numPoints) for i in range(0,numPoints):
self.genx[i][0] = 1
self.genx[i][1] = i
self.geny[i] = (i + bias) + random.uniform(0,1) * variance def gradientDescent(self):
xTrans = self.genx.transpose() #
i = 0
while i < self.numIterations:
hypothesis = np.dot(self.genx,self.theta)
loss = hypothesis - self.geny
#record the cost
self.cost.append(np.sum(loss ** 2))
#calculate the gradient
gradient = np.dot(xTrans,loss)
#updata, gradientDescent
self.theta = self.theta - self.alpha * gradient
i = i + 1 def show(self):
print 'yes' if __name__ == "__main__":
c = dataMinning('c:\\city.txt','c:\\st.txt',np.ones(2),100000,0.000005)
c.genData(100,25,10)
c.gradientDescent()
cx = range(len(c.cost))
plt.figure(1)
plt.plot(cx,c.cost)
plt.ylim(0,25000)
plt.figure(2)
plt.plot(c.genx[:,1],c.geny,'b.')
x = np.arange(0,100,0.1)
y = x * c.theta[1] + c.theta[0]
plt.plot(x,y)
plt.margins(0.2)
plt.show()
图1. 迭代过程中的误差cost
图2. 数据散点图和解直线
参考资料:
1.python编写类:http://blog.csdn.net/wklken/article/details/6313265
2.python中if __name__ == __main__的用法:http://www.cnblogs.com/herbert/archive/2011/09/27/2193482.html
3.matplotlab gallery:http://matplotlib.org/gallery.html
4.python批量梯度下降参考代码:http://www.91r.net/ask/17784587.html