TensorFlow2 手把手教你训练 MNIST 数据集
概述
MNIST 包含 0~9 的手写数字, 共有 60000 个训练集和 10000 个测试集. 数据的格式为单通道 28*28 的灰度图.
get_data 函数
def get_data():
"""
读取数据
:return: 返回分批完的训练集和测试集
"""
# 读取数据
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# 调试输出
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)
# 分割训练集
train_db = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(60000, seed=0)
train_db = train_db.batch(batch_size).map(pre_processing).repeat(iteration_num) # 迭代45次
# 分割测试集
test_db = tf.data.Dataset.from_tensor_slices((X_test, y_test)).shuffle(10000, seed=0)
test_db = test_db.batch(batch_size).map(pre_processing)
return train_db, test_db
pre_processing 函数
def pre_processing(x, y):
'''
数据预处理
:param x: 特征值
:param y: 目标值
:return: 返回预处理好的x, y
'''
# 转换x
x = tf.cast(x, tf.float32) / 255 # 转换为0~1的形式
x = tf.reshape(x, [-1, 784]) # 把x铺平
# 转换y
# y = tf.convert_to_tensor(y) # 转成tensor
y = tf.cast(y, tf.int32) # 转成tensor
y = tf.one_hot(y, depth=10) # 转成one_hot编码
return x, y
main 函数
def main():
"""
主函数
:return: 无返回值
"""
# 获取分批数据
train_db, test_db = get_data()
# 生成w1形状为[784, 512]的截断正态分布, 中心为0, 标差为0.1
w1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1))
# 生成b1形状为[512]初始化为0
b1 = tf.Variable(tf.zeros([512]))
# 生成w2形状为[512, 256]的截断正态分布, 中心为0, 标差为0.1
w2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1))
# 生成b2形状为[256]初始化为0
b2 = tf.Variable(tf.zeros([256]))
# 生成w3形状为[256, 10]的截断正态分布, 中心为0, 标差为0.1
w3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1))
# 生成b3形状为[10]初始化为0
b3 = tf.Variable(tf.zeros([10]))
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
# 第一个隐层
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1) # 激活
# 第二个隐层
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2) # 激活
# 输出层
out = h2 @ w3 + b3
# 计算损失函数
loss = tf.square(y - out)
loss = tf.reduce_mean(loss)
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
p.assign_sub(learning_rate * g)
# 每100批调试输出一下损失
if step % 1000 == 0:
print(step, 'loss:', float(loss))
# 每1000次计算测试集正确率
if step % 5000 == 0:
total, total_correct = 0., 0
for step, (x, y) in enumerate(test_db):
# 第一个隐层
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1) # 激活
# 第二个隐层
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2) # 激活
# 输出层
out = h2 @ w3 + b3
# [256, 10] => [256]
pred = tf.argmax(out, axis=1)
# 从one_hot编码变回来
y = tf.argmax(y, axis=1)
# 判断是否正确
correct = tf.equal(pred, y)
# bool tensor => int tensor => numpy
total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
total += x.shape[0]
print(step, "Accuracy", total_correct / total * 100, "%")
完整代码
import tensorflow as tf
# 定义超参数
batch_size = 256 # 一次训练的样本数目
learning_rate = 0.001 # 学习率
iteration_num = 45 # 迭代次数
def get_data():
"""
读取数据
:return: 返回分批完的训练集和测试集
"""
# 读取数据
(X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
# 调试输出
print("X_train:", X_train.shape, "y_train:", y_train.shape)
print("X_test:", X_test.shape, "y_test:", y_test.shape)
# 分割训练集
train_db = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(60000, seed=0)
train_db = train_db.batch(batch_size).map(pre_processing).repeat(iteration_num) # 迭代45次
# 分割测试集
test_db = tf.data.Dataset.from_tensor_slices((X_test, y_test)).shuffle(10000, seed=0)
test_db = test_db.batch(batch_size).map(pre_processing)
return train_db, test_db
def pre_processing(x, y):
'''
数据预处理
:param x: 特征值
:param y: 目标值
:return: 返回预处理好的x, y
'''
# 转换x
x = tf.cast(x, tf.float32) / 255 # 转换为0~1的形式
x = tf.reshape(x, [-1, 784]) # 把x铺平
# 转换y
# y = tf.convert_to_tensor(y) # 转成tensor
y = tf.cast(y, tf.int32) # 转成tensor
y = tf.one_hot(y, depth=10) # 转成one_hot编码
return x, y
def main():
"""
主函数
:return: 无返回值
"""
# 获取分批数据
train_db, test_db = get_data()
# 生成w1形状为[784, 512]的截断正态分布, 中心为0, 标差为0.1
w1 = tf.Variable(tf.random.truncated_normal([784, 512], stddev=0.1))
# 生成b1形状为[512]初始化为0
b1 = tf.Variable(tf.zeros([512]))
# 生成w2形状为[512, 256]的截断正态分布, 中心为0, 标差为0.1
w2 = tf.Variable(tf.random.truncated_normal([512, 256], stddev=0.1))
# 生成b2形状为[256]初始化为0
b2 = tf.Variable(tf.zeros([256]))
# 生成w3形状为[256, 10]的截断正态分布, 中心为0, 标差为0.1
w3 = tf.Variable(tf.random.truncated_normal([256, 10], stddev=0.1))
# 生成b3形状为[10]初始化为0
b3 = tf.Variable(tf.zeros([10]))
for step, (x, y) in enumerate(train_db):
with tf.GradientTape() as tape:
# 第一个隐层
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1) # 激活
# 第二个隐层
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2) # 激活
# 输出层
out = h2 @ w3 + b3
# 计算损失函数
loss = tf.square(y - out)
loss = tf.reduce_mean(loss)
# 计算梯度
grads = tape.gradient(loss, [w1, b1, w2, b2, w3, b3])
# 更新权重
for p, g in zip([w1, b1, w2, b2, w3, b3], grads):
p.assign_sub(learning_rate * g)
# 每100批调试输出一下损失
if step % 1000 == 0:
print(step, 'loss:', float(loss))
# 每1000次计算测试集正确率
if step % 5000 == 0:
total, total_correct = 0., 0
for step, (x, y) in enumerate(test_db):
# 第一个隐层
h1 = x @ w1 + b1
h1 = tf.nn.relu(h1) # 激活
# 第二个隐层
h2 = h1 @ w2 + b2
h2 = tf.nn.relu(h2) # 激活
# 输出层
out = h2 @ w3 + b3
# [256, 10] => [256]
pred = tf.argmax(out, axis=1)
# 从one_hot编码变回来
y = tf.argmax(y, axis=1)
# 判断是否正确
correct = tf.equal(pred, y)
# bool tensor => int tensor => numpy
total_correct += tf.reduce_sum(tf.cast(correct, dtype=tf.int32)).numpy()
total += x.shape[0]
print(step, "Accuracy", total_correct / total * 100, "%")
if __name__ == "__main__":
main()
输出结果:
X_train: (60000, 28, 28) y_train: (60000,)
X_test: (10000, 28, 28) y_test: (10000,)
0 loss: 1.8554267883300781
39 Accuracy 12.479999999999999 %
1000 loss: 0.21533580124378204
2000 loss: 0.1755288988351822
3000 loss: 0.13523665070533752
4000 loss: 0.12891730666160583
5000 loss: 0.11436235904693604
39 Accuracy 55.95 %
6000 loss: 0.10332286357879639
7000 loss: 0.09788000583648682
8000 loss: 0.09040157496929169
9000 loss: 0.07946525514125824
10000 loss: 0.08393758535385132
39 Accuracy 68.33 %