Gradient Descent 代码实现

2023-10-15 12:17:46
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# create data
np.random.seed(0)
n_sample = 100
dis = np.random.rand(n_sample)
acc = dis * 8 + np.random.random(n_sample) * 4

my_data = {'distance': dis, 'accuracy': acc}
data = pd.DataFrame(my_data)
cost = []
data.head()

# normalize data
data.distance = (data.distance - data.distance.mean()) / data.distance.std()
data.accuracy = (data.accuracy - data.accuracy.mean()) / data.accuracy.std()
plt.scatter(data.distance, data.accuracy)
plt.xlabel('normalized distance')
plt.ylabel('normalized accuracy')
plt.show()


# the cost function of a single variable linear model
def cost(theta0, theta1, x, y):
    # initialize cost
    J = 0
    # the number of observations
    m = len(x)
    # loop through each observation
    for i in range(m):
        # compute the hypothesis
        h = theta1 * x[i] + theta0
        # add to cost
        J += (h - y[i]) ** 2
    # average and normalize cost
    J /= (2 * m)
    return J


# ppartial derivative of cost in terms of theta1
def partial_cost_theta1(theta0, theta1, x, y):
    # hypothesis
    h = theta0 + theta1 * x
    # hypothesis minus observed times x
    diff = (h - y) * x
    # average to compute partial derivate
    partial = diff.sum() / (x.shape[0])
    return partial


partial_theta1 = partial_cost_theta1(0, 5, data.distance, data.accuracy)
print('partial_theta1=', partial_theta1)


# partial derivative of cost in terms of theta0
def partial_cost_theta0(theta0, theta1, x, y):
    # hypothesish
    h = theta0 + theta1 * x
    # difference between hypothesis and observation
    diff = h - y
    # compute partial derivative
    partial = diff.sum() / (x.shape[0])
    return partial


partial_theta0 = partial_cost_theta0(1, 1, data.distance, data.accuracy)
print('partial_theta0=', partial_theta0)


# x is our feature vector-distance
# y is our target vector-accuracy
# alpha is the learning rate
# theta0 is the intial theta0
# theta1 is the intial theta1

def gradient_descent(x, y, alpha=0.1, theta0=0, theta1=0):
    max_epochs = 1000  # maximum number of iterations
    counter = 0  # intialize a counter
    c = cost(theta1, theta0, data.distance, data.accuracy)  # inital cost
    costs = [c]
    # set a convergence threshold to find where the cost function in minimized
    # when the difference between the previous cost and current cost
    # is less than this value we will say the parameter converged
    convergence_thres = 0.000001
    cprev = c + 10
    theta0s = [theta0]
    theta1s = [theta1]
    # when the cost converge or we hit a large number of iterations will we stop updating
    while (np.abs(cprev - c) > convergence_thres) and (counter < max_epochs):
        cprev = c
        # alpha times the partial derivative is our updated
        update0 = alpha * partial_cost_theta0(theta0, theta1, x, y)
        update1 = alpha * partial_cost_theta1(theta0, theta1, x, y)
        # update theta0 and theta1 at the same time
        # we want to compute the slopes at the same set of hypothesised parameters
        # so we update aafter finding the partial derivatives
        theta0 -= update0
        theta1 -= update1

        # store thetas
        theta0s.append(theta0)
        theta1s.append(theta1)

        # compute the new cost
        c = cost(theta0, theta1, data.distance, data.accuracy)

        # store updates
        costs.append(c)
        counter += 1  # count
    return {'theta0': theta0, 'theta1': theta1, 'cost': costs}


print('theta1=', gradient_descent(data.distance, data.accuracy)['theta1'])
descend = gradient_descent(data.distance, data.accuracy, alpha=.01)
plt.scatter(range(len(descend['costs'])), descend['costs'])
plt.show()
码农公寓

相关文章