斯坦福机器学习by吴恩达-线性回归 python实现（ex1更新，含3D画图）

2023-12-10 22:16:39
import  numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d,Axes3D#绘制3D函数图像
from matplotlib import cm#色彩映射
import itertools#提供操作迭代对象的函数

'''
np.loadtxt(fname, dtype=<type 'float'>, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0)
fname: 文件路径（包含文件名）
dtype: 数据类型，默认值为float
comments: 注释
delimiter: 分隔符，默认值是空格
converters: 提前定义一个数据预处理函数，在读取数据时指定某一列执行这一函数，converters是字典形式，如converters={0:add_one}，这里表示对第0列执行add_one这一预处理函数
skiprows: 读取数据时要跳过前几行，默认值为0，必须是int
usecols: 要读取哪些列，默认值是全部列
unpack: 取值为True时按列读取，如x,y,z=loadtxt(...)；默认值为False，此时按行读取
ndim: 返回的数组的最低维数，否则单维数组可能会被挤压。合法值是0（默认值），1或2
返回值: 从文件中读取的数据
'''
data_fromfile=np.loadtxt('data/ex1data1.txt',delimiter=',',usecols=(0,1),unpack=True)
X=np.transpose(data_fromfile[:-1])
y=np.transpose(data_fromfile[-1:])
m=y.size
'''
np.insert(arr, obj, values, axis=None)
arr: 被添加的数组
obj: 添加的位置索引，可以是int，slice或sequence of ints
values: 添加的值，添加后数据类型会与arr里的元素相同，注意shape与arr的一致性
axis: int类型，axis=0时添加行，axis=1时添加列
'''
X=np.insert(X,0,1,axis=1)

def plotData(X,y):
    plt.figure(figsize=(10, 6))
    # 注意！！！plt.scatter不能使用rx这种标记方法，此时应换为plot
    plt.plot(X[:, 1], y[:, 0], 'rx', markersize=10)
    plt.grid(True)
    plt.xlabel('Population of City in 10,000s')
    plt.ylabel('Profit in $10,000s')
plotData(X,y)

iterations=1500
alpha=0.01
initial_theta=np.zeros([X.shape[1],1])

def h(X,theta):
    #np.dot()等价于a.dot(b)
    return np.dot(X,theta)

def computeCost(X,y,theta,m):
    return 1./(2*m)*(h(X,theta)-y).T.dot(h(X,theta)-y)

def descentGradient(X,y,theta):
    cost_arr=[]
    theta_arr=[]
    for i in range(iterations):
        cost_arr.append(computeCost(X,y,theta,m)[0][0])
        theta_arr.append(theta)
        theta=theta-np.transpose(alpha/m*(h(X,theta)-y).T.dot(X))
    return theta,cost_arr,theta_arr
theta,cost_arr,theta_arr=descentGradient(X,y,initial_theta)

def plotCost(cost_arr,iterations):
    plt.figure(figsize=(10,6))
    plt.plot(range(iterations),cost_arr)
plotCost(cost_arr,iterations)

def plotLine(X,y,theta):
    plotData(X,y)
    plt.plot(X[:,1],h(X,theta),label = 'h(x) = %0.2f + %0.2fx'%(theta[0],theta[1]))
    plt.legend()#用于显示图例
plotLine(X,y,theta)
plt.show()

fig=plt.figure(figsize=(12,12))
ax=fig.gca(projection='3d')
x_axis=np.arange(-10,10,.5)
y_axis=np.arange(-1,4,.1)
x_data,y_data,z_data=[],[],[]
for i in x_axis:
    for j in y_axis:
        x_data.append(i)
        y_data.append(j)
        z_data.append(computeCost(X,y,np.array([[i],[j]]),m)[0][0])
ax.scatter(x_data,y_data,z_data,c=np.abs(z_data),cmap=plt.get_cmap('YlOrRd'))
'''
c:指定颜色
cmap: colormap, 指定颜色
'''
plt.xlabel(r'$\theta_0$',fontsize=30)
plt.ylabel(r'$\theta_1$',fontsize=30)
plt.title('Cost',fontsize=30)
plt.plot(np.array(theta_arr)[:,0],np.array(theta_arr)[:,1],cost_arr,'bo-',label='Training path')
plt.show()
码农公寓

相关文章