一, 单变量函数的梯度下降
我们假设有一个单变量的函数
函数的微分
初始化,起点为
学习率为
根据梯度下降的计算公式
我们开始进行梯度下降的迭代计算过程:
如图,经过四次的运算,也就是走了四步,基本就抵达了函数的最低点,也就是山底
二, 感知机原始学习算法的python实现
随机梯度下降算法
import numpy as np
x_train=np.array([[3,3],[4,3], [1,1]])
y_train=np.array([1,1,-1])
alpha=1
omega=np.array([0,0])
b=0
i=0
while i < len(x_train):
if (x_train[i].dot(omega)+b)*y_train[i] <=0:
omega=omega+alpha*y_train[i]*x_train[i]
b=b+alpha*y_train[i]
i=0
else:
i=i+1
print i,omega, b
批量梯度下降算法
import numpy as np
import matplotlib.pyplot as plt
import math
def prodata(omega1=2,omega2=-3,b=5,seed=1,size=30):
np.random.seed(seed)
omega=np.array([omega1,omega2])
x1 = np.arange(0, size)
t = np.random.normal(loc=0, scale=5,size=size)
x2=t-(b+omega[0]*x1)/omega[1]*1.0
y_train=[]
x_train=np.array(zip(x1,x2))
for x in t:
if x>=0:
y_train.append(1)
else:
y_train.append(-1)
y_train=np.array(y_train)
return x_train,y_train
def SGD(x_train,y_train):
alpha=0.01
omega=np.array([0,0])
b,i=0,0
while i < len(x_train):
if (x_train[i].dot(omega)+b)*y_train[i] <=0:
omega=omega+alpha*y_train[i]*x_train[i]
b=b+alpha*y_train[i]
i=0
else:
i=i+1
return omega,b
def SGD2(x_train,y_train,epsion = 1,alpha = 0.01):
omega,b=np.array([0,0]),0
def get_sample(omega,b,x_train,y_train):
x,y=[],[]
for i in range(0,len(x_train)):
if(x_train[i].dot(omega) + b) * y_train[i] <= 0:
x.append(x_train[i])
y.append(y_train[i])
x=np.array(x)
y=np.array(y)
return x,y
def gradient_sum(x,y):
return (y.reshape(-1, 1) * x).sum(0), y.sum()
def target(omega,b,x_train,y_train):
x,y=get_sample(omega,b,x_train,y_train)
if x !=[]:
return -(y*((omega*x).sum(1)+b)).sum(),x,y
else:
return 0,x,y
old, x,y= target(omega,b,x_train,y_train)
now = old-(epsion +1)
while abs(old-now) > epsion and len(x) !=0:
omega=omega+alpha*gradient_sum(x,y)[0]
b=b+alpha*gradient_sum(x,y)[1]
old=now
now,x,y=target(omega,b,x_train,y_train)
print "omega,b,now", omega, b, now
return omega,b,old-now
if __name__ == '__main__':
#生成原始数据
omega1,omega2,b=2,-3,5
size=30
x_train,y_train=prodata(omega1,omega2,b,1,size)
#梯度下降法参数估计
omega_estimate,b_estimate,erro=SGD2(x_train,y_train)
# omega_estimate, b_estimate = SGD(x_train, y_train)
#打印结果
print "actual is :",omega1,omega2,b
n=math.sqrt((omega1**2+omega2**2+b**2)*1.0)
print "norm is ",omega1*1.0/n,omega2*1.0/n,b*1.0/n
print "estimate is :",omega_estimate[0],omega_estimate[1],b_estimate
print "erro:",erro
#画图
fig = plt.figure()
ax1 = fig.add_subplot(111)
plt.xlabel('X1')
plt.ylabel('X2')
x1 = np.arange(0, 35,34)
omega=np.array([omega1,omega2])
x2 = -(b+omega1*x1)/omega2*1.0
ax1.plot(x1,x2,c="black")#实际的直线,黑色
x2 = -(b_estimate+omega_estimate[0]*x1)/omega_estimate[1]*1.0#拟合的直线,红色
ax1.plot(x1,x2,c="red")
for i in range(0,len(x_train)):
if y_train[i]>0:
ax1.scatter(x_train[i,0],x_train[i,1],c = "r" ,marker = 'o')
else:
ax1.scatter(x_train[i, 0], x_train[i, 1], c="b", marker='^')
plt.show()
三, 感知机学习算法的对偶形式
两个角度看待这个更新,分别是 和 。前者是在走的总步长基础上遇到新的错误样本再增加一个步长(因为每遇到一个错误样本增加一个步长 )。后者是对第 i 个样本总的使用次数在它又被选为错误样本时对她的计数更新(因为每遇到一个错误样本增加的步长是固定的 ,所以只需要更新计数,记下样本总的使用次数)
关于该处的理解 https://blog.csdn.net/qq_26826585/article/details/87967520
import numpy as np
def SGD4(x_train,y_train):
N=len(x_train)
alpha=np.zeros((N,))
def Gram(x_train):
gram=np.zeros((len(x_train),len(x_train)))
for i in range(0,len(x_train)):
for j in range(0,len(x_train)):
gram[i,j]=x_train[i].dot(x_train[j])
return gram
b,i,theta=0,0,1
gram=Gram(x_train)
print gram
while i < len(x_train):
print i
alpha*y_train*gram[:,i]
if ((alpha*y_train).dot(gram[:,i])+b)*y_train[i] <=0:
alpha[i]=alpha[i]+theta
b=b+theta*y_train[i]
i=0
else:
i=i+1
print alpha, b
return alpha,b
if __name__ =='__main__':
x_train=np.array([[3,3],[4,3],[1,1]])
y_train=np.array([1,1,-1])
print SGD4(x_train,y_train)