一步步用python实现Logistic Regression

为什么需要标准化?
在说明为什么要标准化前,我们不妨说说一般的标准化是怎么做的:先求出数据的均值和方差,然后对每一个样本数据,先减去均值,然后除以方差,也就是(x-μ)/σ2,说白了就是转化成标准正态分布!这样,每个特征都转化成了同样的分布,不管原来的范围是什么,现在都基本限定在同样的范围内了。原数据的不同特征的范围可能会有很大差别,比如一批数据中“年龄”的范围就比较小,可能20岁 ~ 60岁之间,但是另一个特征“年收入”可能波动范围就很大,也许0.5万 ~ 1000万,这种情况下回导致我们的等高线图变得十分“扁平”,在梯度下降的时候会很容易走弯路,因此梯度下降会比较慢,精度也不高。但是经过标准化(也称归一化)之后,等高线就变规矩了,就很容易梯度下降了。
另外,对于图片数据的话,进行标准化很简单,因为RGB三个通道的范围都是255,我们对图片的处理就是直接除以255即可。


'''
Author: huajia
Date: 2021-11-10 14:46:51
LastEditors: huajia
LastEditTime: 2021-11-11 14:19:47
Description: 略略略
'''
import cv2
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm

os.chdir("D:/下载/train/train")

# 读取图片的数量
n = 1000
weight = 64
height = 64
def init():
    imgs = np.zeros((n, weight, height, 3), dtype=np.uint8)
    imgs_lable = np.zeros((n, 1), dtype=np.uint8)
    for i in tqdm(range(n)):
        if(i % 2 == 0):
            imgs[i] = cv2.resize(cv2.imread('cat.%d.jpg' %
                                 i), (weight, height))
            imgs_lable[i] = 1
        else:
            imgs[i] = cv2.resize(cv2.imread('dog.%d.jpg' %
                                 i), (weight, height))
    # if(i<10):
    #     plt.subplot(2,5,i+1)
    #     plt.imshow(imgs[i])
    train = imgs.reshape(imgs.shape[0], -1).T/255.
    train_lable = imgs_lable.T
    print("imgs_shape:", imgs.shape, train.shape, train_lable.shape)
    # plt.show()
    return train,train_lable


def sigmoid(z):
    a = 1.0/(1+np.exp(-z))
    return a


def initialize_with_zeros(dim):
    w = np.zeros((dim, 1))
    b = 0
    return w, b


def propagate(w, b, X, Y):
    """
    传参:
    w -- 权重, shape: (num_px * num_px * 3, 1)
    b -- 偏置项, 一个标量
    X -- 数据集,shape: (num_px * num_px * 3, m),m为样本数
    Y -- 真实标签,shape: (1,m)

    返回值:
    cost, dw ,db,后两者放在一个字典grads里
    """
    # 获取样本数m:
    m = X.shape[1]

    # 前向传播 :
    A = sigmoid(np.dot(w.T, X)+b)  # 调用前面写的sigmoid函数
    cost = -(np.sum(Y*np.log(A)+(1-Y)*np.log(1-A)))/m

    # 反向传播:
    dZ = A-Y
    dw = (np.dot(X, dZ.T))/m
    db = (np.sum(dZ))/m

    # 返回值:
    grads = {"dw": dw,
             "db": db}

    return grads, cost


def optimize(w, b, X, Y, num_iterations, learning_rate, print_cost=False):
    # 定义一个costs数组,存放每若干次迭代后的cost,从而可以画图看看cost的变化趋势:
    costs = []
    # 进行迭代:
    for i in range(num_iterations):
        # 用propagate计算出每次迭代后的cost和梯度:
        grads, cost = propagate(w, b, X, Y)
        dw = grads["dw"]
        db = grads["db"]

        # 用上面得到的梯度来更新参数:
        w = w - learning_rate*dw
        b = b - learning_rate*db

        # 每100次迭代,保存一个cost看看:
        if i % 100 == 0:
            costs.append(cost)

        # 这个可以不在意,我们可以每100次把cost打印出来看看,从而随时掌握模型的进展:
        if print_cost and i % 100 == 0:
            print("Cost after iteration %i: %f" % (i, cost))
    # 迭代完毕,将最终的各个参数放进字典,并返回:
    params = {"w": w,
              "b": b}
    grads = {"dw": dw,
             "db": db}
    return params, grads, costs


def predict(w, b, X):
    m = X.shape[1]
    Y_prediction = np.zeros((1, m))

    A = sigmoid(np.dot(w.T, X)+b)
    for i in range(m):
        if A[0, i] > 0.5:
            Y_prediction[0, i] = 1
        else:
            Y_prediction[0, i] = 0

    return Y_prediction


def logistic_model(X_train, X_label, learning_rate=0.1, num_iterations=2000, print_cost=False):
    # 获特征维度,初始化参数:
    dim = X_train.shape[0]
    W, b = initialize_with_zeros(dim)

    # 梯度下降,迭代求出模型参数:
    params, grads, costs = optimize(
        W, b, X_train, X_label, num_iterations, learning_rate, print_cost)
    W = params['w']
    b = params['b']

    #保存参数
    np.savez('../../model.npz',w=W,b=b)
    # 用学得的参数进行预测:
    prediction_train = predict(W, b, X_train)

    # 计算准确率,分别在训练集和测试集上:
    accuracy_train = 1 - np.mean(np.abs(prediction_train - X_label))

    print("Accuracy on train set:", accuracy_train)

   # 为了便于分析和检查,我们把得到的所有参数、超参数都存进一个字典返回出来:
    d = {"costs": costs,
         "Y_prediction_train": prediction_train,
         "w": W,
         "b": b,
         "learning_rate": learning_rate,
         "num_iterations": num_iterations,
         "train_acy": accuracy_train,
         }
    return d

def testModel(Y_test):
    data = np.load('../../model.npz')
    print(data['b'])
    Y_res = sigmoid(np.dot(data['w'].T, Y_test)+data['b'])[0,0]
    # print('y', Y_res)
    return Y_res
    


if __name__ == '__main__':
    # train,train_lable=init()
    # d = logistic_model(train, train_lable, num_iterations = 20000, learning_rate = 0.005, print_cost = True)

    imgtest = cv2.resize(cv2.imread('../../test/test/%d.jpg'%(np.random.randint(1,12500))), (weight, height))
    # imgtest = cv2.resize(cv2.imread('../../test/test/5.jpg'), (weight, height))
    test = imgtest.reshape(1, -1).T/255.
    res=testModel(test)
    plt.imshow(imgtest)
    plt.rcParams['font.sans-serif']=['SimHei']
    plt.rcParams['axes.unicode_minus'] = False
    plt.title('本张图片是猫的概率为%f%%'%(res*100))
    plt.show()


上一篇:Windows下Python-openCV学习(四)-------色彩空间与通道


下一篇:python核心高级学习总结3-------python实现进程的三种方式及其区别