吴恩达--神经网络-week1-hw4

# Ref: https://blog.csdn.net/u013733326/article/details/79767169

import numpy as np
import testCases
import h5py
import matplotlib.pyplot as plt
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward
import lr_utils
import traceback

np.random.seed(1)

# 2 layers
def initial_parameters(n_x = 4, n_h = 2, n_y =1):
    """
    2 lays of nn, L=2, #(hidden layer) = 1
    :param n_x: dims of input layer
    :param n_h: dims of hidden layer
    :param n_y: dims of output layer
    :return: the dict type of params, including W1, b1, W2, b2
    """
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h)
    b2 = np.zeros((n_y, 1))

    assert (W1.shape[1] == n_x)
    assert (b2.shape == (n_y, 1))

    params = {
        'W1': W1,
        'b1': b1,
        'W2': W2,
        'b2': b2,
    }

    return params


# initialize the params of deep nn
def initial_params_deep(layers_dims):
    """
    initialize the parameters of deep nn
    :param layers_dims: input layer, hidden layers, output layer
    :return: initial parameters
    """
    np.random.seed(3)
    params = {}
    L = len(layers_dims)

    for l in range(1, L):
        params['W' + str(l)] = np.random.randn(layers_dims[l], layers_dims[l-1]) / np.sqrt(layers_dims[l-1])
        params['b' + str(l)] = np.zeros((layers_dims[l], 1))

        assert (params['W' + str(l)].shape == (layers_dims[l], layers_dims[l-1]))
    return params


def linear_forward(A, W, b):
    """
    linear forward of nn
    :param A: the acitivated value of the last layer (l-1)-th
    :param W: the weight of matrix; W[l]
    :param b: the bias, b[l]
    :return: Z, cache including A, W and b in order to calculate the backward
    """
    Z = np.dot(W, A) + b
    assert (Z.shape == (W.shape[0], A.shape[1]))
    cache = (A, W, b)
    return Z, cache


def linear_activation_forward(A_pre, W, b, activation):
    """
    calculate the activation values,
    :param A_pre: the activation of last layer
    :param W: W[l]
    :param b: b[l]
    :param activation: the status of activation: Sigmoid or Relu
    :return: the activation of the l-th layer & the cache including
    the liner caches and the activation caches
    """
    Z, linear_cache = linear_forward(A_pre, W, b)
    if activation == 'Sigmoid':
        A, activ_cache = sigmoid(Z)
    elif activation == 'Relu':
        A, activ_cache = relu(Z)

    assert (Z.shape == (W.shape[0], A_pre.shape[1]))
    cache = (linear_cache, activ_cache)
    return A, cache


# Multi layers
def L_layers_forward(X, params):
    """
    calculate  the cache and the activation the forward propagation
    :param X: a[0], input feature
    :param params: parameters including the L layers
    :return: the L-th layer activation and the cache of L layers
    """
    caches = []
    A = X
    L = len(params) // 2

    for i in range(1, L):
        A_prev = A
        A, cache = linear_activation_forward(A_prev, params['W' + str(i)], params['b' + str(i)], activation='Relu')
        caches.append(cache)

    A_L, cache = linear_activation_forward(A, params['W' + str(L)], params['b' + str(L)], activation='Sigmoid')
    caches.append(cache)

    assert (A_L.shape == (1, X.shape[1]))

    return A_L, caches

def computer_cost(A_L, Y):
    """
    calcuclate the cost function: m is the number of samples
    :param A_L: the activation of L-th layer
    :param Y: 0 or 1, 0: not cat; 1: cat
    :return: the total cost
    """
    m = Y.shape[1]
    cost = -np.sum(np.multiply(np.log(A_L), Y) + np.multiply(np.log(1-A_L), 1-Y)) / m
    cost = np.squeeze(cost)

    assert (cost.shape == ())
    return cost


def linear_backward(dZ, cache):
    """
    linear backward of nn, input the dZ, the cache including the A_pre, W, b
    :param dZ: the gradient in the l-th layer
    :param cache: the cache in the l-th layer, which including the A_pre, W, b
    A_pre: the activation in last layer, i.e.: (l-1)th layer
    W: the weight matrix in the l-th layer, b: the bias in the l-th layer
    :return: the gradient of W, b, A_prev
    """
    A_prev, W, b = cache
    m = A_prev.shape[1]

    dW = np.dot(dZ, A_prev.T) / m
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = np.dot(W.T, dZ)

    assert (dA_prev.shape == A_prev.shape)

    return dA_prev, dW, db


def linear_activation_backward(dA, cache, activation):
    """
    calculate the dW, db, dA_prev, input dA, cache, the status of activation function
    :param dA: the gradient of l-th layer
    :param cache: including the linear caches and the activation caches in the l-th layer
    :param activation: the status of activation function: Relu or Sigmoid
    :return: dW, db, dA_prev that is (l-1)-th layer
    """
    linear_cache, activation_cache = cache

    if activation == "Relu":
        dZ = relu_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    elif activation == "Sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)

    return dA_prev, dW, db,

# L layers backward propagation
def L_layers_backward(A_L, Y, caches):
    grads = {}
    L = len(caches)
    m = Y.shape[1]
    Y = Y.reshape(A_L.shape)

    currrent_staus = caches[L-1]
    dAL = - (np.divide(Y, A_L) - np.divide(1 - Y, 1 - A_L))
    grads['dA' + str(L-1)], grads['dW' + str(L)], grads['db' + str(L)] = linear_activation_backward(dAL,
                                                                                                      currrent_staus,
                                                                                                      activation='Sigmoid')

    for l in reversed(range(L-1)):
        currrent_staus = caches[l]
        dA_prev_temp, dW_temp, db_temp = \
            linear_activation_backward(grads['dA' + str(l + 1)], currrent_staus, activation='Relu')
        grads['dA' + str(l)] = dA_prev_temp
        grads['dW' + str(l + 1)] = dW_temp
        grads['db' + str(l + 1)] = db_temp

    return grads


def update_params(params, grads, alpha=0.05):
    """
    update the params
    :param params: including the W, b
    :param grads: including the dA[l-1] dW[l] d[l]
    :param alpha: learning rate:0.05
    :return: the updated params
    """
    L = len(params)//2
    for l in range(L):
        params['W' + str(l+1)] = params['W' + str(l+1)] - alpha * grads['dW' + str(l+1)]
        params['b' + str(l+1)] = params['b' + str(l+1)] - alpha * grads['db' + str(l+1)]

    return params


def two_layers_nn(X, Y, layer_dims, alpha=0.075, num_iterations=10000, print_cost=False, is_plot=True):
    """
    two layers nn model, the activation function of the first layer is the Relu; the activation function of
    the second layer is the Sigmoid.
    :param X: Inpute features
    :param Y: label that 0 represents the not cat, 1 represents the cat
    :param layer_dims: n_x, n_h, n_y: input layer, hidden layer, output layer
    :param alpha: learning rate
    :param num_iterations: the number of iterations
    :param print_cost: default is False
    :param is_plot: default is True
    :return: the updated params
    """
    np.random.seed(1)
    costs = []
    grads = {}
    (n_x, n_h, n_y) = layer_dims

    ## initialize theparamters
    params = initial_parameters(n_x, n_h, n_y)

    W1 = params['W1']
    b1 = params['b1']
    W2 = params['W2']
    b2 = params['b2']

    try:
        plt.figure()
        for i in range(num_iterations):
            # forward propagation
            A1, cache1 = linear_activation_forward(X, W1, b1, activation='Relu')
            A2, cache2 = linear_activation_forward(A1, W2, b2, activation='Sigmoid')
            # calculate the cost
            cost = computer_cost(A2, Y)


            # backward propagation
            dA2 = -(np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
            dA1, dW2, db2 = linear_activation_backward(dA2, cache2, 'Sigmoid')
            dA0, dW1, db1 = linear_activation_backward(dA1, cache1, 'Relu')

            # gradient
            grads["dW1"] = dW1
            grads["db1"] = db1
            grads["dW2"] = dW2
            grads["db2"] = db2

            # update
            params = update_params(params, grads, alpha)
            W1 = params['W1']
            b1 = params['b1']
            W2 = params['W2']
            b2 = params['b2']

            if i % 100 == 0:
                costs.append(cost)
                if print_cost:
                    print(f"第{i}次迭代的成本是: ", np.squeeze(cost))

            if is_plot:
                plt.plot(np.squeeze(costs))
                plt.title(f'Learning rate is {alpha}')
                plt.xlabel('iterations per tens')
                plt.ylabel('cost')
                plt.show()

        return params
    except:
        print(traceback.print_exc())

train_set_x_orig , train_set_y , test_set_x_orig , test_set_y , classes = lr_utils.load_dataset()

train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0], -1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0], -1).T

train_x = train_x_flatten / 255
train_y = train_set_y
test_x = test_x_flatten / 255
test_y = test_set_y

n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x,n_h,n_y)

print('==================two layers======================')
parameters = two_layers_nn(train_x, train_set_y, layer_dims=(n_x, n_h, n_y), alpha=0.0075, num_iterations=1000, print_cost=True, is_plot=True)

def L_layers_nn(X, Y, layers_dims, alpha = 0.075, num_iterations = 1000, print_cost=False, is_plot=True):
    """
    L layers nn model, layers_dims is the relations among the layers, the (L-1) layers' activation function is Relu, the L-th
    layer's activation function is the Sigmoid.
    :param X: input features
    :param Y: the label: 0(not cat); 1(cat)
    :param layers_dims: input layer, hidden layer(>2), output layer
    :param alpha: learning rate
    :param num_iterations: iterations, default is 1000
    :param print_cost: default is False
    :param is_plot: default is True
    :return: the updated parameters that is optimal value during the iterations
    """
    np.random.seed(1)
    costs = []

    params = initial_params_deep(layers_dims)

    plt.figure()
    for i in range(0, num_iterations):
        A_L, caches = L_layers_forward(X, params)
        cost = computer_cost(A_L, Y)

        grads = L_layers_backward(A_L, Y, caches)
        params = update_params(params, grads, alpha)


        if i % 100 == 0:
            costs.append(cost)
            if print_cost:
                print(f'第{i}次迭代成本是: %.3f' % cost)

    if is_plot:
        plt.plot(np.squeeze(costs))
        plt.title(f'Learning rate is {alpha}')
        plt.ylabel('cost')
        plt.xlabel('Iterations per 100')
        plt.show()
    return params


print('=====================多层测试============================')
layers_dims = [12288, 20, 7, 5, 1]  # 5-layer model
parameters = L_layers_nn(train_x, train_y, layers_dims, alpha=0.0075,
                         num_iterations=500, print_cost=True, is_plot=True)


def predict_y(X, y, params):
    """
    predict the y according to the params
    :param X:  Input features
    :param y:  0: not cat; 1: cat
    :param params: W[l] b[l]
    :return: the predicted values
    """
    m = X.shape[1]
    n = len(params) // 2
    predict = np.zeros((1, m))

    probies, caches = L_layers_forward(X, params)

    for i in range(0, probies.shape[0]):
        if probies[0, i] > 0.5:
            predict[0, i] = 1
        else:
            predict[0, i] = 0

    accur = float(np.sum((predict == y))/m) * 100
    print("准确率:%.3f  " % accur + str('%'))
    return predict

predictions_train = predict_y(train_x, train_y, parameters) #训练集
predictions_test = predict_y(test_x, test_y, parameters) #测试集

def print_mislabeled_image(classes, X, y, predict_y):
    """
    this function figure the mislabeled image that is the cat
    :param classes: ndarray: not cat; cat
    :param X: input features
    :param y: ture label: 0(not cat) or 1(cat)
    :param predict_y: predict value
    :return: mislabeled_indices and a figure that true value is cat, but the predict value
    is not cat
    """
    a = y + predict_y
    mislabeled_indices = np.asarray(np.where(a == 1))
    plt.rcParams['figure.figsize'] = (40, 60)
    num_images = len(mislabeled_indices[0])

    plt.figure()

    for i in range(num_images):
        index = mislabeled_indices[1][i]

        plt.subplot(2, num_images, i + 1)
        plt.imshow(X[:, index].reshape(64, 64, 3), interpolation='nearest')
        plt.axis('off')
        plt.title('Prediction' + classes[int(predict_y[0, index])].decode('utf-8') +
                  '\n Classes' + classes[(y[0, index])].decode('utf-8'))

    plt.savefig('mislabeled_image.png', dpi=500)

    return mislabeled_indices

mislabeled_indices = print_mislabeled_image(classes, test_x, test_y, predictions_test)

if __name__ == '__main__':
    pass
上一篇:用C# 7.0的switch...case模式匹配取代一堆if语句


下一篇:【656】SegNet 相关说明