import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# create data
np.random.seed(0)
n_sample = 100
dis = np.random.rand(n_sample)
acc = dis * 8 + np.random.random(n_sample) * 4
my_data = {'distance': dis, 'accuracy': acc}
data = pd.DataFrame(my_data)
cost = []
data.head()
# normalize data
data.distance = (data.distance - data.distance.mean()) / data.distance.std()
data.accuracy = (data.accuracy - data.accuracy.mean()) / data.accuracy.std()
plt.scatter(data.distance, data.accuracy)
plt.xlabel('normalized distance')
plt.ylabel('normalized accuracy')
plt.show()
# the cost function of a single variable linear model
def cost(theta0, theta1, x, y):
# initialize cost
J = 0
# the number of observations
m = len(x)
# loop through each observation
for i in range(m):
# compute the hypothesis
h = theta1 * x[i] + theta0
# add to cost
J += (h - y[i]) ** 2
# average and normalize cost
J /= (2 * m)
return J
# ppartial derivative of cost in terms of theta1
def partial_cost_theta1(theta0, theta1, x, y):
# hypothesis
h = theta0 + theta1 * x
# hypothesis minus observed times x
diff = (h - y) * x
# average to compute partial derivate
partial = diff.sum() / (x.shape[0])
return partial
partial_theta1 = partial_cost_theta1(0, 5, data.distance, data.accuracy)
print('partial_theta1=', partial_theta1)
# partial derivative of cost in terms of theta0
def partial_cost_theta0(theta0, theta1, x, y):
# hypothesish
h = theta0 + theta1 * x
# difference between hypothesis and observation
diff = h - y
# compute partial derivative
partial = diff.sum() / (x.shape[0])
return partial
partial_theta0 = partial_cost_theta0(1, 1, data.distance, data.accuracy)
print('partial_theta0=', partial_theta0)
# x is our feature vector-distance
# y is our target vector-accuracy
# alpha is the learning rate
# theta0 is the intial theta0
# theta1 is the intial theta1
def gradient_descent(x, y, alpha=0.1, theta0=0, theta1=0):
max_epochs = 1000 # maximum number of iterations
counter = 0 # intialize a counter
c = cost(theta1, theta0, data.distance, data.accuracy) # inital cost
costs = [c]
# set a convergence threshold to find where the cost function in minimized
# when the difference between the previous cost and current cost
# is less than this value we will say the parameter converged
convergence_thres = 0.000001
cprev = c + 10
theta0s = [theta0]
theta1s = [theta1]
# when the cost converge or we hit a large number of iterations will we stop updating
while (np.abs(cprev - c) > convergence_thres) and (counter < max_epochs):
cprev = c
# alpha times the partial derivative is our updated
update0 = alpha * partial_cost_theta0(theta0, theta1, x, y)
update1 = alpha * partial_cost_theta1(theta0, theta1, x, y)
# update theta0 and theta1 at the same time
# we want to compute the slopes at the same set of hypothesised parameters
# so we update aafter finding the partial derivatives
theta0 -= update0
theta1 -= update1
# store thetas
theta0s.append(theta0)
theta1s.append(theta1)
# compute the new cost
c = cost(theta0, theta1, data.distance, data.accuracy)
# store updates
costs.append(c)
counter += 1 # count
return {'theta0': theta0, 'theta1': theta1, 'cost': costs}
print('theta1=', gradient_descent(data.distance, data.accuracy)['theta1'])
descend = gradient_descent(data.distance, data.accuracy, alpha=.01)
plt.scatter(range(len(descend['costs'])), descend['costs'])
plt.show()