DL笔记1 权重衰退

def train_concise(wd):
    net = nn.Sequential(nn.Linear(num_inputs, 1))
    for param in net.parameters():
        param.data.normal_()
    loss = nn.MSELoss()
    num_epochs, lr = 100, 0.003
    # 偏置参数没有衰减。
    trainer = torch.optim.SGD([
        {"params":net[0].weight,'weight_decay': wd},
        {"params":net[0].bias}], lr=lr)
    animator = d2l.Animator(xlabel='epochs', ylabel='loss', yscale='log',
                            xlim=[5, num_epochs], legend=['train', 'test'])
    for epoch in range(num_epochs):
        for X, y in train_iter:
            with torch.enable_grad():
                trainer.zero_grad()
                l = loss(net(X), y)
            l.backward()
            trainer.step()
        if (epoch + 1) % 5 == 0:
            animator.add(epoch + 1, (d2l.evaluate_loss(net, train_iter, loss),
                                     d2l.evaluate_loss(net, test_iter, loss)))
    print('w的L2范数:', net[0].weight.norm().item())
"""
@author: Inki
@contact: inki.yinji@gmail.com
@version: Created in 2020 1206, last modified in 2020 1206.
"""

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch import optim
from torch.utils import data


def generate_dataset(num_tr=20, num_te=100, num_inputs=200, w=None, b=0.05):
    """
    Generate the dataset.
    """
    if w is None:
        w = np.ones(num_inputs) * 0.01

    ret_features = np.random.randn(num_tr + num_te, num_inputs)
    ret_labels = np.dot(ret_features, w) + b
    ret_labels += np.random.normal(0, 0.01, len(ret_labels))
    ret_labels = np.reshape(ret_labels, (len(ret_labels), 1))
    ret_features = torch.tensor(ret_features, dtype=torch.float)
    ret_labels = torch.tensor(ret_labels, dtype=torch.float)
    return ret_features[:num_tr, :], ret_features[num_tr:, :], ret_labels[:num_tr], ret_labels[num_tr:]


def plot(x, y, x_label, y_label, x1=None, y1=None, legend=None):
    """
    The plot function.
    """
    plt.semilogy(x, y)
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    if x1 and y1:
        plt.semilogy(x1, y1, linestyle=':')
        plt.legend(legend)
    plt.show()
    plt.close()


def fit(wd, tr_features, te_features, tr_labels, te_labels,
        batch_size=1, loss=nn.MSELoss(), num_epochs=100, lr=0.003):
    """
    The fit model.
    """

    temp_dataset = data.TensorDataset(tr_features, tr_labels)
    tr_iter = data.DataLoader(temp_dataset, batch_size, shuffle=True)

    # step 1. Initialize the parameters.
    net = nn.Linear(len(tr_features[0]), 1)
    nn.init.normal_(net.weight, mean=0, std=1)
    nn.init.normal_(net.bias, mean=0, std=1)

    temp_optimizer_w = optim.SGD(params=[net.weight], lr=lr, weight_decay=wd)
    temp_optimizer_b = optim.SGD(params=[net.bias], lr=lr)

    # Step 2. Train and test.
    temp_tr_ls, temp_te_ls = [], []
    for _ in range(num_epochs):
        for x, y in tr_iter:
            temp_ls = loss(net(x), y).mean()
            temp_optimizer_w.zero_grad()
            temp_optimizer_b.zero_grad()
            temp_ls.backward()
            temp_optimizer_w.step()
            temp_optimizer_b.step()

        temp_tr_ls.append(loss(net(tr_features), tr_labels).item())
        temp_te_ls.append(loss(net(te_features), te_labels).item())
    plot(range(1, num_epochs + 1), temp_tr_ls, 'epochs', 'loss',
         range(1, num_epochs + 1), temp_te_ls, ['train', 'test'])
    print("L2 norm of w:", net.weight.data.norm().item())


if __name__ == '__main__':
    temp_tr_features, temp_te_features, temp_tr_labels, temp_te_labels = generate_dataset()
    fit(3, temp_tr_features, temp_te_features, temp_tr_labels, temp_te_labels)

上一篇:pytorch学习-datasets & dataloaders


下一篇:机器学习实战之k-近邻算法