# GRADED FUNCTION: linear_function
def linear_function():
"""
Implements a linear function:
Initializes X to be a random tensor of shape (3,1)
Initializes W to be a random tensor of shape (4,3)
Initializes b to be a random tensor of shape (4,1)
Returns:
result -- Y = WX + b
"""
np.random.seed(1)
"""
Note, to ensure that the "random" numbers generated match the expected results,
please create the variables in the order given in the starting code below.
(Do not re-arrange the order).
"""
# (approx. 4 lines)
# X = ...
# W = ...
# b = ...
# Y = ...
# YOUR CODE STARTS HERE
X=tf.constant(np.random.randn(3,1),name='X')
W=tf.Variable(np.random.randn(4,3),name='W')
b=tf.Variable(np.random.randn(4,1),name='b')
Y=tf.add(tf.matmul(W,X),b)
# YOUR CODE ENDS HERE
return Y
设置激活函数
# GRADED FUNCTION: sigmoid
def sigmoid(z):
"""
Computes the sigmoid of z
Arguments:
z -- input value, scalar or vector
Returns:
a -- (tf.float32) the sigmoid of z
"""
# tf.keras.activations.sigmoid requires float16, float32, float64, complex64, or complex128.
# (approx. 2 lines)
# z = ...
# a = ...
# YOUR CODE STARTS HERE
z=tf.cast(z,tf.float32)
# 转换为tf.float32类型
a=tf.keras.activations.sigmoid(z)
# YOUR CODE ENDS HERE
return a
多分类任务转换为独热码(one-hot encode)
# GRADED FUNCTION: one_hot_matrix
def one_hot_matrix(label, depth=6):
"""
Computes the one hot encoding for a single label
Arguments:
label -- (int) Categorical labels
depth -- (int) Number of different classes that label can take
Returns:
one_hot -- tf.Tensor A single-column matrix with the one hot encoding.
"""
# (approx. 1 line)
# one_hot = ...
# YOUR CODE STARTS HERE
one_hot=tf.reshape(tf.one_hot(label,depth,axis=0),[-1,])
# YOUR CODE ENDS HERE
return one_hot
# GRADED FUNCTION: forward_propagation
def forward_propagation(X, parameters):
"""
Implements the forward propagation for the model: LINEAR -> RELU -> LINEAR -> RELU -> LINEAR
Arguments:
X -- input dataset placeholder, of shape (input size, number of examples)
parameters -- python dictionary containing your parameters "W1", "b1", "W2", "b2", "W3", "b3"
the shapes are given in initialize_parameters
Returns:
Z3 -- the output of the last LINEAR unit
"""
# Retrieve the parameters from the dictionary "parameters"
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
#(approx. 5 lines) # Numpy Equivalents:
# Z1 = ... # Z1 = np.dot(W1, X) + b1
# A1 = ... # A1 = relu(Z1)
# Z2 = ... # Z2 = np.dot(W2, A1) + b2
# A2 = ... # A2 = relu(Z2)
# Z3 = ... # Z3 = np.dot(W3, A2) + b3
# YOUR CODE STARTS HERE
Z1=tf.math.add(tf.linalg.matmul(W1,X),b1)
A1=tf.keras.activations.relu(Z1)
Z2=tf.math.add(tf.linalg.matmul(W2,A1),b2)
A2=tf.keras.activations.relu(Z2)
Z3=tf.math.add(tf.linalg.matmul(W3,A2),b3)
# YOUR CODE ENDS HERE
return Z3
计算损失
# GRADED FUNCTION: compute_cost
def compute_cost(logits, labels):
"""
Computes the cost
Arguments:
logits -- output of forward propagation (output of the last LINEAR unit), of shape (6, num_examples)
labels -- "true" labels vector, same shape as Z3
Returns:
cost - Tensor of the cost function
"""
#(1 line of code)
# cost = ...
# YOUR CODE STARTS HERE
cost=tf.reduce_mean(tf.keras.losses.categorical_crossentropy(tf.transpose(labels),tf.transpose(logits),from_logits=True))
# from_logits is true mean adding a softmax_layer
# before the loss function(y_true*log(y_pred))
# YOUR CODE ENDS HERE
return cost
构建模型
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.0001,
num_epochs = 1500, minibatch_size = 32, print_cost = True):
"""
Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
Arguments:
X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
X_test -- training set, of shape (input size = 12288, number of training examples = 120)
Y_test -- test set, of shape (output size = 6, number of test examples = 120)
learning_rate -- learning rate of the optimization
num_epochs -- number of epochs of the optimization loop
minibatch_size -- size of a minibatch
print_cost -- True to print the cost every 10 epochs
Returns:
parameters -- parameters learnt by the model. They can then be used to predict.
"""
costs = [] # To keep track of the cost
train_acc = []
test_acc = []
# Initialize your parameters
#(1 line)
parameters = initialize_parameters()
W1 = parameters['W1']
b1 = parameters['b1']
W2 = parameters['W2']
b2 = parameters['b2']
W3 = parameters['W3']
b3 = parameters['b3']
optimizer = tf.keras.optimizers.Adam(learning_rate)
# The CategoricalAccuracy will track the accuracy for this multiclass problem
test_accuracy = tf.keras.metrics.CategoricalAccuracy()
train_accuracy = tf.keras.metrics.CategoricalAccuracy()
dataset = tf.data.Dataset.zip((X_train, Y_train))
test_dataset = tf.data.Dataset.zip((X_test, Y_test))
# We can get the number of elements of a dataset using the cardinality method
m = dataset.cardinality().numpy()
minibatches = dataset.batch(minibatch_size).prefetch(8)
test_minibatches = test_dataset.batch(minibatch_size).prefetch(8)
#X_train = X_train.batch(minibatch_size, drop_remainder=True).prefetch(8)# <<< extra step
#Y_train = Y_train.batch(minibatch_size, drop_remainder=True).prefetch(8) # loads memory faster
# Do the training loop
for epoch in range(num_epochs):
epoch_cost = 0.
#We need to reset object to start measuring from 0 the accuracy each epoch
train_accuracy.reset_states()
for (minibatch_X, minibatch_Y) in minibatches:
with tf.GradientTape() as tape:
# 1. predict
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
# 2. loss
minibatch_cost = compute_cost(Z3, tf.transpose(minibatch_Y))
# We acumulate the accuracy of all the batches
train_accuracy.update_state(tf.transpose(Z3), minibatch_Y)
trainable_variables = [W1, b1, W2, b2, W3, b3]
grads = tape.gradient(minibatch_cost, trainable_variables)
optimizer.apply_gradients(zip(grads, trainable_variables))
epoch_cost += minibatch_cost
# We divide the epoch cost over the number of samples
epoch_cost /= m
# Print the cost every 10 epochs
if print_cost == True and epoch % 10 == 0:
print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
print("Train accuracy:", train_accuracy.result())
# We evaluate the test set every 10 epochs to avoid computational overhead
for (minibatch_X, minibatch_Y) in test_minibatches:
Z3 = forward_propagation(tf.transpose(minibatch_X), parameters)
test_accuracy.update_state(tf.transpose(Z3), minibatch_Y)
print("Test_accuracy:", test_accuracy.result())
costs.append(epoch_cost)
train_acc.append(train_accuracy.result())
test_acc.append(test_accuracy.result())
test_accuracy.reset_states()
return parameters, costs, train_acc, test_acc