38、Auto-Encoder和Variational Auto-Encoder实战

2023-10-26 11:10:28

1、Auto-Encoder

　降到自定义层

  1 import  os
  2 import  tensorflow as tf
  3 import  numpy as np
  4 from    tensorflow import keras
  5 from    tensorflow.keras import Sequential, layers
  6 from    PIL import Image
  7 from    matplotlib import pyplot as plt
  8 
  9 
 10 
 11 tf.random.set_seed(22)
 12 np.random.seed(22)
 13 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 14 assert tf.__version__.startswith('2.')
 15 
 16 
 17 def save_images(imgs, name): #将多张image保存为一张image
 18     new_im = Image.new('L', (280, 280))
 19 
 20     index = 0
 21     for i in range(0, 280, 28):
 22         for j in range(0, 280, 28):
 23             im = imgs[index]
 24             im = Image.fromarray(im, mode='L')
 25             new_im.paste(im, (i, j))
 26             index += 1
 27 
 28     new_im.save(name)
 29 
 30 
 31 h_dim = 20  #将原来的784维的数据降到20维数据，任意设定
 32 batchsz = 512 #
 33 lr = 1e-3  #学习率
 34 
 35 
 36 (x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
 37 x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
 38 # 不需要labels，所以不加载y
 39 train_db = tf.data.Dataset.from_tensor_slices(x_train)
 40 train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
 41 test_db = tf.data.Dataset.from_tensor_slices(x_test)
 42 test_db = test_db.batch(batchsz)
 43 
 44 print(x_train.shape, y_train.shape)
 45 print(x_test.shape, y_test.shape)
 46 
 47 
 48 
 49 class AE(keras.Model): #继承keras.Model
 50 
 51     def __init__(self):
 52         super(AE, self).__init__()
 53 
 54         # Encoders  3层全连接层，输入维度为28x28，784
 55         self.encoder = Sequential([
 56             layers.Dense(256, activation=tf.nn.relu),
 57             layers.Dense(128, activation=tf.nn.relu),
 58             layers.Dense(h_dim)  #降到自定义维度
 59         ])
 60 
 61         # Decoders 输入维度为h_dim，3个全连接层，升到高维
 62         self.decoder = Sequential([
 63             layers.Dense(128, activation=tf.nn.relu),
 64             layers.Dense(256, activation=tf.nn.relu),
 65             layers.Dense(784)
 66         ])
 67 
 68 
 69     def call(self, inputs, training=None):  #前向传播过程
 70         # [b, 784] => [b, 10] 降维
 71         h = self.encoder(inputs)
 72 
 73         # [b, 10] => [b, 784] 重建
 74         x_hat = self.decoder(h)
 75 
 76         return x_hat
 77 
 78 
 79 
 80 model = AE()  #创建模型
 81 model.build(input_shape=(None, 784)) #这里要使用元组类型进行传入
 82 model.summary()
 83 
 84 optimizer = tf.optimizers.Adam(lr=lr)  #优化器
 85 
 86 
 87 for epoch in range(100):
 88     for step, x in enumerate(train_db):
 89 
 90         #先进行reshape，[b, 28, 28] => [b, 784]
 91         x = tf.reshape(x, [-1, 784])
 92 
 93         with tf.GradientTape() as tape:  #梯度器，用来写要求梯度的函数
 94             x_rec_logits = model(x)
 95 
 96             rec_loss = tf.losses.binary_crossentropy(x, x_rec_logits, from_logits=True)  #将图片的每一个点都当作一个分类问题
 97             rec_loss = tf.reduce_mean(rec_loss)  # 求均值
 98 
 99         grads = tape.gradient(rec_loss, model.trainable_variables)
100         optimizer.apply_gradients(zip(grads, model.trainable_variables))
101 
102 
103         if step % 100 ==0:
104             print(epoch, step, float(rec_loss))
105 
106 
107         # evaluation 测试
108         x = next(iter(test_db))
109         logits = model(tf.reshape(x, [-1, 784]))
110         x_hat = tf.sigmoid(logits)
111         # [b, 784] => [b, 28, 28]
112         x_hat = tf.reshape(x_hat, [-1, 28, 28])
113 
114         # [b, 28, 28] => [2b, 28, 28]
115         x_concat = tf.concat([x, x_hat], axis=0)
116         x_concat = x_hat
117         x_concat = x_concat.numpy() * 255.
118         x_concat = x_concat.astype(np.uint8)
119         save_images(x_concat, 'ae_images/rec_epoch_%d.png'%epoch)

2、Variational Auto-Encoder

h_dim一半是均值（小网络），一半是方差（小网络），解码器从h层进行采样。

从均值和方差进行sample采样时，loss对均值和方差是不可导的，所以将原来的均值和方差的分布变成均值加方差的分布。

  1 import  os
  2 import  tensorflow as tf
  3 import  numpy as np
  4 from    tensorflow import keras
  5 from    tensorflow.keras import Sequential, layers
  6 from    PIL import Image
  7 from    matplotlib import pyplot as plt
  8 
  9 
 10 tf.random.set_seed(22)
 11 np.random.seed(22)
 12 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 13 assert tf.__version__.startswith('2.')
 14 
 15 
 16 def save_images(imgs, name):
 17     new_im = Image.new('L', (280, 280))
 18 
 19     index = 0
 20     for i in range(0, 280, 28):
 21         for j in range(0, 280, 28):
 22             im = imgs[index]
 23             im = Image.fromarray(im, mode='L')
 24             new_im.paste(im, (i, j))
 25             index += 1
 26 
 27     new_im.save(name)
 28 
 29 
 30 h_dim = 20
 31 batchsz = 512
 32 lr = 1e-3
 33 
 34 
 35 (x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()
 36 x_train, x_test = x_train.astype(np.float32) / 255., x_test.astype(np.float32) / 255.
 37 # we do not need label
 38 train_db = tf.data.Dataset.from_tensor_slices(x_train)
 39 train_db = train_db.shuffle(batchsz * 5).batch(batchsz)
 40 test_db = tf.data.Dataset.from_tensor_slices(x_test)
 41 test_db = test_db.batch(batchsz)
 42 
 43 print(x_train.shape, y_train.shape)
 44 print(x_test.shape, y_test.shape)
 45 
 46 z_dim = 10
 47 
 48 class VAE(keras.Model):
 49 
 50     def __init__(self): #创建网络层
 51         super(VAE, self).__init__() # 输入为784
 52 
 53         # Encoder h_dim可以分为均值网络和方差方差
 54         self.fc1 = layers.Dense(128)  #h_dim的上一层
 55         self.fc2 = layers.Dense(z_dim) # get mean prediction 分支结构，两个网络，得到均值  fc1->fc2
 56         self.fc3 = layers.Dense(z_dim) # 得到方差 fc1->fc3
 57 
 58         # Decoder
 59         self.fc4 = layers.Dense(128)
 60         self.fc5 = layers.Dense(784)  #最后一层为784
 61 
 62     def encoder(self, x):
 63 
 64         h = tf.nn.relu(self.fc1(x))
 65         # get mean
 66         mu = self.fc2(h)
 67         # get variance
 68         log_var = self.fc3(h)  #log方差，正无穷到负无穷
 69 
 70         return mu, log_var
 71 
 72     def decoder(self, z):
 73 
 74         out = tf.nn.relu(self.fc4(z))
 75         out = self.fc5(out)
 76 
 77         return out
 78 
 79     def reparameterize(self, mu, log_var):  #从均值和方差进行sample采样时，loss对均值和方差是不可导的，所以将原来的均值和方差的分布变成均值加方差的分布。
 80 
 81         eps = tf.random.normal(log_var.shape)
 82 
 83         std = tf.exp(log_var*0.5)
 84 
 85         z = mu + std * eps
 86         return z
 87 
 88     def call(self, inputs, training=None): # 网络层进行前向传播的过程
 89 
 90         # [b, 784] => [b, z_dim], [b, z_dim] ，得到均值和方差的两个网络的logits
 91         mu, log_var = self.encoder(inputs)
 92         # reparameterization trick
 93         z = self.reparameterize(mu, log_var)
 94 
 95         x_hat = self.decoder(z)
 96 
 97         return x_hat, mu, log_var
 98 
 99 
100 model = VAE()
101 model.build(input_shape=(4, 784))
102 optimizer = tf.optimizers.Adam(lr)
103 
104 for epoch in range(1000):
105 
106     for step, x in enumerate(train_db):
107 
108         x = tf.reshape(x, [-1, 784])
109 
110         with tf.GradientTape() as tape:
111             x_rec_logits, mu, log_var = model(x)
112 
113             rec_loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=x, logits=x_rec_logits)
114             rec_loss = tf.reduce_sum(rec_loss) / x.shape[0]
115 
116             # compute kl divergence (mu, var) ~ N (0, 1)  计算KL散度 假设为正态分布
117             # https://stats.stackexchange.com/questions/7440/kl-divergence-between-two-univariate-gaussians
118             # KL(p,q)=logσ2σ1+σ21+(μ1−μ2)22σ22−12
119 
120             kl_div = -0.5 * (log_var + 1 - mu**2 - tf.exp(log_var)) #两个正态分布的KL散度的计算
121             kl_div = tf.reduce_sum(kl_div) / x.shape[0]  #求平均
122 
123             loss = rec_loss + 1. * kl_div  #重建误差加KL误差
124 
125         grads = tape.gradient(loss, model.trainable_variables) #计算梯度
126         optimizer.apply_gradients(zip(grads, model.trainable_variables))
127 
128 
129         if step % 100 == 0:
130             print(epoch, step, 'kl div:', float(kl_div), 'rec loss:', float(rec_loss))
131 
132 
133     # evaluation
134     z = tf.random.normal((batchsz, z_dim))
135     logits = model.decoder(z)  #随机给一个向量，因为解码器已经学习到了各个特征的分布，所以直接可以对新的图片进行生成
136     x_hat = tf.sigmoid(logits)  #结果进行sigmod
137     x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.  #还原到0-255
138     x_hat = x_hat.astype(np.uint8)
139     save_images(x_hat, 'vae_images/sampled_epoch%d.png'%epoch)
140 
141     x = next(iter(test_db))
142     x = tf.reshape(x, [-1, 784])
143     x_hat_logits, _, _ = model(x)
144     x_hat = tf.sigmoid(x_hat_logits)
145     x_hat = tf.reshape(x_hat, [-1, 28, 28]).numpy() *255.
146     x_hat = x_hat.astype(np.uint8)
147     save_images(x_hat, 'vae_images/rec_epoch%d.png'%epoch)

码农公寓

1、Auto-Encoder

2、Variational Auto-Encoder

相关文章