TensorFlow----fashion_mnist数据集神经网络的搭建

2023-10-14 12:44:28

文章目录

前言
一、数据集加载以及数据集的预处理
二、全连接网络层构建
三、计算梯度和代价函数并更新参数
四、完整程序
总结

前言

深度学习小白，若有错误希望各位大佬多多包涵。

一、数据集加载以及数据集的预处理

数据集可以直接从网上下载，这里把数据集分为了训练集和测试集，但是更多的时候我们会分成训练集，交叉验证集和测试集，这样的训练效果会更好。

(x, y), (x_test, y_test) = datasets.fashion_mnist.load_data()

这里得到的x, y, x_test , y_test都是Numpy类型，要转化为张量类型

def preprocess(x, y):
    # 并做归一化处理
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y

这个函数与map()组合使用，传入preprocess，即可完成映射，类型转换

然后，为了加快计算的速度将整个样本切片为batch大小的小样本
为了方便操作，先生成Dataset类的对象，然后再dataset成员方法batch

db = tf.data.Dataset.from_tensor_slices((x, y))
db = db.map(preprocess).shuffle(10000).batch(batches)

测试集做相同的处理即可

循环处理每一个batch数据样本

sample = next(iter(db))

二、全连接网络层构建

使用Sequential容器，生成Sequential类的一个实例

model = Sequential([
    # [b,784] @ [784,256]--> [b,256]
    layers.Dense(256, activation=tf.nn.relu),
    # [b,256]--> [b,128]
    layers.Dense(128, activation=tf.nn.relu),
    # [b, 128] --> [b,64]
    layers.Dense(64, activation=tf.nn.relu),
    # [b,64] --> [b,32]
    layers.Dense(32, activation=tf.nn.relu),
    # [b,32] --> [b,10] 输出层
    layers.Dense(10)
])

并用成员函数build, summary完成网络权值，偏置和输入维度的初始化与网络模型参数状况的输出

model.build(input_shape=[None, 28 * 28]) 
model.summary()

构造优化器
优化器主要使用apply_gradients方法传入变量和对应梯度从而来对给定变量进行迭代，或者直接使用minimize方法对目标函数进行迭代优化。

optimizers = optimizers.Adam(learning_rate=1e-3)

三、计算梯度和代价函数并更新参数

在使用自动求导功能计算梯度，需要将向前计算过程放置在tf.GradientTape()环境中, 利用GradientTape对象的gradient()方法自动求解参数的梯度, 并利用optimizers对象更新参数

 with tf.GradientTape() as tape:
 	logits = model(x)
 	y_onehot = tf.one_hot(y, depth=10)
 	loss_ce = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
    loss_ce = tf.reduce_mean(loss_ce)

grads = tape.gradient(loss_ce, model.trainable_variables)
optimizers.apply_gradients(zip(grads, model.trainable_variables))

四、完整程序

# -*- codeing = utf-8 -*-
# @Time : 10:02
# @Author:Paranipd
# @File : mnist_test.py
# @Software:PyCharm

import os
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, optimizers, Sequential, metrics  # 数据集， 网络层， 分类器， 容器

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # 去掉不必要的报错

# 预处理，参数类型转换 ，加载的数据集是Nunpy类型
def preprocess(x, y):
    # 并做归一化处理
    x = tf.cast(x, dtype=tf.float32) / 255.
    y = tf.cast(y, dtype=tf.int32)
    return x, y

# 加载数据集 = 训练集 + 测试集  x,y : 普通数据类型(Numpy)
# x.sahpe : (60000, 28 , 28) y.shape:(60000,)
# x.min-max:(0,255)   y :[0,9]
(x, y), (x_test, y_test) = datasets.fashion_mnist.load_data()
print(x.shape, x.dtype, y.shape, y.dtype)

# 批处理样本数
batches = 128
# 创建一个数据集，其元素是给定张量的切片
# 通过使用tf.data.Dataset提供的接口from_tensor_slices将(x,y)-->Dataset类的对象
db = tf.data.Dataset.from_tensor_slices((x, y))
# map数据类型的转换
# shuffle 将数据集随机打散
# batch 将多个样本组成一个batch ,加速计算
# 注意不同的顺序，会有不同的结果
db = db.map(preprocess).shuffle(10000).batch(batches)  # 为了使每次取数据样本的数量为batches
# print('db:', db)


db_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))
db_test = db_test.map(preprocess).batch(batches)

# 通过Dataset的对象db进行迭代，获取下一批batch  ==>sample = next(iter(db))
db_iter = iter(db)
sample = next(db_iter)
print('batch:', sample[0].shape, sample[1].shape)

# 通过Sequential容器封装一个网络大类对象
model = Sequential([
    # [b,784] @ [784,256]--> [b,256]
    layers.Dense(256, activation=tf.nn.relu),
    # [b,256]--> [b,128]
    layers.Dense(128, activation=tf.nn.relu),
    # [b, 128] --> [b,64]
    layers.Dense(64, activation=tf.nn.relu),
    # [b,64] --> [b,32]
    layers.Dense(32, activation=tf.nn.relu),
    # [b,32] --> [b,10] 输出层
    layers.Dense(10)
])

# 初始化网络的权值和维度
model.build(input_shape=[None, 28 * 28])    # Sequential类的类方法
# 输出网络模式各层的参数状况，查看网络模型的结构
model.summary()

# w = w - lr * grad
# 学习率的设置，更新参数
optimizers = optimizers.Adam(learning_rate=1e-3)


def main():
    # 将整个数据集迭代30次
    for epoch in range(30):
        # 迭代数据集对象，待step参数 ，完成一次batch的数据训练叫做一个step
        # 批次处理部分数据集，一次处理128个样本
        for step, (x, y) in enumerate(db):
            # x: [b,28,28] ==> [b,784]   一维
            # y: [b]
            x = tf.reshape(x, [-1, 28 * 28])

            # 在使用自动求导功能计算梯度，需要将向前计算过程放置在tf.GradientTape()环境中
            # 利用GradientTape对象的gradient()方法自动求解参数的梯度
            # 并利用optimizers对象更新参数
            with tf.GradientTape() as tape:    # 梯度记录器
                # [b,784] ==> [b,10]
                # model(x) 实际是调用类中__call__方法
                # 输出网络模型(向前计算)结果
                logits = model(x)
                # onehot编码
                y_onehot = tf.one_hot(y, depth=10)

                # 均方差代价函数
                loss_mse = tf.reduce_mean(tf.losses.MSE(y_onehot, logits))
                # 交叉熵损失计算函数
                loss_ce = tf.losses.categorical_crossentropy(y_onehot, logits, from_logits=True)
                loss_ce = tf.reduce_mean(loss_ce)

            # 对所有的可优化变量求导
            grads = tape.gradient(loss_ce, model.trainable_variables)
            # 更新可以优化张量
            # zip将对应元素打包为元组，这些元组组成一个列表
            optimizers.apply_gradients(zip(grads, model.trainable_variables))

            if step % 100 == 0:
                print(epoch, step, 'loss:', float(loss_ce), float(loss_mse))

        # test 计算一个精确度
        total_correct = 0
        total_num = 0
        for x, y in db_test:
            # x: [b,28,28] ==> [b,784]
            # y: [b]
            x = tf.reshape(x, [-1, 28 * 28])
            # [b,10]
            logits = model(x)
            # logits --> prob: [b,10] int64

            # 将输出结果归一化处理，得到和为1的概率
            prob = tf.nn.softmax(logits, axis=1)  # [0,1]
            # 找到对应维度最大值的索引位置
            pred = tf.argmax(prob, axis=1)
            pred = tf.cast(pred, dtype=tf.int32)
            # pred:[b]
            # y: [b]
            # correct: [b], True(1): equal; False(0): not equal
            correct = tf.equal(pred, y)
            correct = tf.reduce_sum(tf.cast(correct, dtype=tf.int32))
            # 预测对的数量
            total_correct += int(correct)
            # 总的数量
            total_num += x.shape[0]
        # 精确度
        acc = total_correct / total_num
        print(epoch, 'text acc:', acc)

if __name__ == "__main__":
    main()

模型结果

最终的预测精确度有0.87左右，要是再做一些优化处理和误差处理准确度还可以更高。

总结

提示：这里对文章进行总结：

码农公寓

文章目录

前言

一、数据集加载以及数据集的预处理

二、全连接网络层构建

三、计算梯度和代价函数并更新参数

四、完整程序

总结

相关文章