Stanford CS20学习笔记

2022-04-09 19:27:55

Lecture Note 2

Tensorboard　　P3

Data Structures　　P4

Math Operations　　P6

Data Types　　P7

　　tf native && python native

tensorflow && numpy　　P9

Variables　　P10-14

　　var要先initiate/assign

placeholder　　P15-16

Lecture Note 3

An example of logitic regression　　P3

- How to define a los function?　　P4-6

- tf.data 导入数据用　　P6-9

- Optimizer　　P9-13

- eg: logistic on MNIST　　P14

Lecture Note 4

Eager：方便在Python中使用TensorFlow

eg: ppt P19-P23　　不用再tf.session.run

自动求导　　P25-28

与传统tf命令的区别　　P32

usage　　P37

Assignment 1

1. Commonly used tensorflow operations

 """

 Simple exercises to get used to TensorFlow API

 You should thoroughly test your code.

 TensorFlow's official documentation should be your best friend here

 CS20: "TensorFlow for Deep Learning Research"

 cs20.stanford.edu

 Created by Chip Huyen (chiphuyen@cs.stanford.edu)

 """

 import os

 os.environ['TF_CPP_MIN_LOG_LEVEL']=''

 import tensorflow as tf

 sess = tf.InteractiveSession()

 ###############################################################################

 # 1a: Create two random 0-d tensors x and y of any distribution.

 # Create a TensorFlow object that returns x + y if x > y, and x - y otherwise.

 # Hint: look up tf.cond()

 # I do the first problem for you

 ###############################################################################

 x = tf.random_uniform([])  # Empty array as shape creates a scalar.

 y = tf.random_uniform([])

 out = tf.cond(tf.greater(x, y), lambda: x + y, lambda: x - y)

 print(sess.run(out))

 ###############################################################################

 # 1b: Create two 0-d tensors x and y randomly selected from the range [-1, 1).

 # Return x + y if x < y, x - y if x > y, 0 otherwise.

 # Hint: Look up tf.case().

 ###############################################################################

 # YOUR CODE

 x = tf.random_uniform([])

 y = tf.random_uniform([])

 xdy = lambda x,y: x-y

 xpy = lambda x,y: x+y

 res = tf.case({tf.less(x,y): lambda: xpy(x,y), tf.greater(x,y): lambda: xdy(x,y)}, default=lambda: 0.00, exclusive=True)

 print(sess.run(res))

 ###############################################################################

 # 1c: Create the tensor x of the value [[0, -2, -1], [0, 1, 2]]

 # and y as a tensor of zeros with the same shape as x.

 # Return a boolean tensor that yields Trues if x equals y element-wise.

 # Hint: Look up tf.equal().

 ###############################################################################

 # YOUR CODE

 x = tf.constant([[0, -2, -1], [0, 1, 2]])

 y = tf.zeros_like(x)

 res = tf.equal(x,y)

 print(sess.run(res))

 ###############################################################################

 # 1d: Create the tensor x of value

 # [29.05088806,  27.61298943,  31.19073486,  29.35532951,

 #  30.97266006,  26.67541885,  38.08450317,  20.74983215,

 #  34.94445419,  34.45999146,  29.06485367,  36.01657104,

 #  27.88236427,  20.56035233,  30.20379066,  29.51215172,

 #  33.71149445,  28.59134293,  36.05556488,  28.66994858].

 # Get the indices of elements in x whose values are greater than 30.

 # Hint: Use tf.where().

 # Then extract elements whose values are greater than 30.

 # Hint: Use tf.gather().

 ###############################################################################

 # YOUR CODE

 x=tf.constant([[29.05088806,  27.61298943,  31.19073486,  29.35532951], [30.97266006,  26.67541885,  38.08450317,  20.74983215], [34.94445419,  34.45999146,  29.06485367,  36.01657104], [27.88236427,  20.56035233,  30.20379066,  29.51215172], [33.71149445,  28.59134293,  36.05556488,  28.66994858]])

 h1=tf.where(tf.greater(x, 30))

 print(sess.run(h1))

 h2=tf.gather_nd(x,h1)

 print(sess.run(h2))

 ###############################################################################

 # 1e: Create a diagnoal 2-d tensor of size 6 x 6 with the diagonal values of 1,

 # 2, ..., 6

 # Hint: Use tf.range() and tf.diag().

 ###############################################################################

 # YOUR CODE

 ran=tf.range(1,7,1)

 dig=tf.diag(ran)

 print(sess.run(dig))

 ###############################################################################

 # 1f: Create a random 2-d tensor of size 10 x 10 from any distribution.

 # Calculate its determinant.

 # Hint: Look at tf.matrix_determinant().

 ###############################################################################

 # YOUR CODE

 x = tf.random_uniform((10,10))

 res=tf.matrix_determinant(x)

 print(sess.run(res))

 ###############################################################################

 # 1g: Create tensor x with value [5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9].

 # Return the unique elements in x

 # Hint: use tf.unique(). Keep in mind that tf.unique() returns a tuple.

 ###############################################################################

 # YOUR CODE

 x=tf.constant([5, 2, 3, 5, 10, 6, 2, 3, 4, 2, 1, 1, 0, 9])

 y, idx=tf.unique(x)

 print(sess.run(y))

 ###############################################################################

 # 1h: Create two tensors x and y of shape 300 from any normal distribution,

 # as long as they are from the same distribution.

 # Use tf.cond() to return:

 # - The mean squared error of (x - y) if the average of all elements in (x - y) is negative, or

 # - The sum of absolute value of all elements in the tensor (x - y) otherwise.

 # Hint: see the Huber loss function in the lecture slides 3.

 ###############################################################################

 # YOUR CODE

 x = tf.random_normal([300])

 y = tf.random_normal([300])

 res=tf.cond(tf.reduce_mean(x-y)<0, lambda: tf.reduce_mean(tf.square(x-y)), lambda: tf.reduce_sum(tf.abs(x-y)))

 print(sess.run(res))

2. Logistic regression in tensorflow

 """ Starter code for simple logistic regression model for MNIST

 with tf.data module

 MNIST dataset: yann.lecun.com/exdb/mnist/

 Created by Chip Huyen (chiphuyen@cs.stanford.edu)

 CS20: "TensorFlow for Deep Learning Research"

 cs20.stanford.edu

 Lecture 03

 """

 import os

 os.environ['TF_CPP_MIN_LOG_LEVEL']=''

 import numpy as np

 import tensorflow as tf

 import time

 import utils

 # Define paramaters for the model

 learning_rate = 0.01

 batch_size = 128

 n_epochs = 50

 n_train = 60000

 n_test = 10000

 # Step 1: Read in data

 mnist_folder = 'data/mnist'

 utils.download_mnist(mnist_folder)

 train, val, test = utils.read_mnist(mnist_folder, flatten=True)

 # Step 2: Create datasets and iterator

 # create training Dataset and batch it

 train_data = tf.data.Dataset.from_tensor_slices(train)

 train_data = train_data.shuffle(30000) # if you want to shuffle your data

 train_data = train_data.batch(batch_size)

 # create testing Dataset and batch it

 test_data = None

 #############################

 ########## TO DO ############

 test_data = tf.data.Dataset.from_tensor_slices(test)

 test_data = test_data.batch(batch_size)

 #############################

 # create one iterator and initialize it with different datasets

 iterator = tf.data.Iterator.from_structure(train_data.output_types,

                                            train_data.output_shapes)

 img, label = iterator.get_next()

 train_init = iterator.make_initializer(train_data)    # initializer for train_data

 test_init = iterator.make_initializer(test_data)    # initializer for train_data

 # Step 3: create weights and bias

 # w is initialized to random variables with mean of 0, stddev of 0.01

 # b is initialized to 0

 # shape of w depends on the dimension of X and Y so that Y = tf.matmul(X, w)

 # shape of b depends on Y

 w, b = None, None

 #############################

 ########## TO DO ############

 w=tf.get_variable("W", shape=(784,10), initializer=tf.random_normal_initializer(mean=0, stddev=1))

 b=tf.get_variable("b", shape=(1,10), initializer=tf.zeros_initializer())

 #############################

 # Step 4: build model

 # the model that returns the logits.

 # this logits will be later passed through softmax layer

 logits = None

 #############################

 ########## TO DO ############

 logits=tf.matmul(img,w)+b

 #############################

 # Step 5: define loss function

 # use cross entropy of softmax of logits as the loss function

 loss = None

 #############################

 ########## TO DO ############

 entropy=tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=label)

 loss=tf.reduce_mean(entropy)

 #############################

 # Step 6: define optimizer

 # using Adamn Optimizer with pre-defined learning rate to minimize loss

 optimizer = None

 #############################

 # REF: https://blog.csdn.net/mao_xiao_feng/article/details/53382790

 ########## TO DO ############

 optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

 #############################

 # Step 7: calculate accuracy with test set

 preds = tf.nn.softmax(logits)

 correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(label, 1))

 accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

 writer = tf.summary.FileWriter('./graphs/logreg', tf.get_default_graph())

 with tf.Session() as sess:

     start_time = time.time()

     sess.run(tf.global_variables_initializer())

     # train the model n_epochs times

     for i in range(n_epochs):

         sess.run(train_init)    # drawing samples from train_data

         total_loss = 0

         n_batches = 0

         try:

             while True:

                 _, l = sess.run([optimizer, loss])

                 total_loss += l

                 n_batches += 1

         except tf.errors.OutOfRangeError:

             pass

         print('Average loss epoch {0}: {1}'.format(i, total_loss/n_batches))

     print('Total time: {0} seconds'.format(time.time() - start_time))

     # test the model

     sess.run(test_init)            # drawing samples from test_data

     total_correct_preds = 0

     try:

         while True:

             accuracy_batch = sess.run(accuracy)

             total_correct_preds += accuracy_batch

     except tf.errors.OutOfRangeError:

         pass

     print('Accuracy {0}'.format(total_correct_preds/n_test))

 writer.close()

Lecture Note5

word embedding: Representing a word by means of its neighbors

softmax：预测概率　　Note P3

Model介绍

　　Note P4-7, P9-14

Tensorflow programming的一般模式　　Note P7-8

1. Import data (use tf.data or placehholder)

2. Define weights

3. Define model

4. Define loss function

5. Define optimizer
6. Train the model
　　6.1 Initialize all model variables
　　6.2 Initialize iterator/feed in the training data
　　6.3 Execute the model on training data
　　6.4 Compute cost
　　6.5 Adjust parameter to minimize the cost

Name Scope：用于在tensorboard中给ops归类　　Note P15

VarScope：为了在不同计算图(eg: 不同NN中)复用变量值　　Note P17-19

collection　　Note P20

tf.saver　　Note P21-24

tf.summary　　用于在tensorboard中看变量(eg: loss)的变化情况　　Note P25-27

randomization　　Note P27-29

autodiff　　Note P29

Lecture Note 6

CNN

imput image: width * height * depth (图像的长/宽/color [eg: RGB] )　　　eg: Note P2

3 main types of layers:　　Conv Layer / Pooling Layer / Fully-Connected Layer

1. Conv Layer　　　　Note P6 图

　　depth：卷积核（filter）的数量

　　stride：卷积核在input上每次移动的步长

　　zero-padding：在input的边框上补几圈0，以保证卷积前后input/output大小一致

　　summary：Note P9

2. Pooling Layer　　　　Note P13

　　用于降维，常用于conv layer之后。且可以降低overfitting

3. Normalization Layer　　　　Note P14

4. FC(Fully-Connected) Layer　　　　Note P14

5. Convert FC to Conv Layer　　　　Note P14

　　不会qwq

6. Conv Network Architecture　　　　Note P16

Lecture Note 7

1. tf.nn.conv2d　　　　Note P3

2. Def a layer in a function, 便于复用

　　Conv layer　　P4

　　Max pooling　　P6

　　Fully connected　　P7

3. A more simple way: tf.layers　　　　Note P9

Assignment 2

1. CNN for mnist

 """ Using convolutional net on MNIST dataset of handwritten digits

 MNIST dataset: http://yann.lecun.com/exdb/mnist/

 CS 20: "TensorFlow for Deep Learning Research"

 cs20.stanford.edu

 Chip Huyen (chiphuyen@cs.stanford.edu)

 Lecture 07

 """

 import os

 os.environ['TF_CPP_MIN_LOG_LEVEL']=''

 import time 

 import tensorflow as tf

 import utils

 def conv_relu(inputs, filters, k_size, stride, padding, scope_name):

     '''

     A method that does convolution + relu on inputs

     '''

     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:

         in_channels = inputs.shape[-1]

         kernel = tf.get_variable('kernel',

                                 [k_size, k_size, in_channels, filters],

                                 initializer = tf.truncated_normal_initializer())

         biases = tf.get_variable('biases', [filters],

                                 initializer = tf.random_normal_initializer())

         conv = tf.nn.conv2d(inputs, kernel, strides=[1, stride, stride, 1], padding=padding)

     return tf.nn.relu(conv+biases, name=scope.name)

 def maxpool(inputs, ksize, stride, padding='VALID', scope_name='pool'):

     '''A method that does max pooling on inputs'''

     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:

         pool = tf.nn.max_pool(inputs,

                              ksize=[1, ksize, ksize, 1],

                              strides=[1, stride, stride, 1],

                              padding=padding)

     return pool

 def fully_connected(inputs, out_dim, scope_name='fc'):

     '''

     A fully connected linear layer on inputs

     '''

     with tf.variable_scope(scope_name, reuse=tf.AUTO_REUSE) as scope:

         in_dim = inputs.shape[-1]

         W = tf.get_variable('weights',

                             [in_dim, out_dim],

                             initializer=tf.truncated_normal_initializer())

         b = tf.get_variable('biases',

                             [out_dim],

                             initializer=tf.constant_initializer(0.0))

         out = tf.matmul(inputs, W) + b

     return out

 class ConvNet(object):

     def __init__(self):

         self.lr = 0.001

         self.batch_size = 128

         self.keep_prob = tf.constant(0.75)

         self.gstep = tf.Variable(0, dtype=tf.int32,

                                 trainable=False, name='global_step')

         self.n_classes = 10

         self.skip_step = 20

         self.n_test = 10000

         self.training=False

     def get_data(self):

         with tf.name_scope('data'):

             train_data, test_data = utils.get_mnist_dataset(self.batch_size)

             iterator = tf.data.Iterator.from_structure(train_data.output_types,

                                                    train_data.output_shapes)

             img, self.label = iterator.get_next()

             self.img = tf.reshape(img, shape=[-1, 28, 28, 1])

             # reshape the image to make it work with tf.nn.conv2d

             self.train_init = iterator.make_initializer(train_data)  # initializer for train_data

             self.test_init = iterator.make_initializer(test_data)    # initializer for train_data

     def inference_by_functions(self):

         '''

         Build the model according to the description we've shown in class

         Define the model by using the functions above

         '''

         conv1 = conv_relu(inputs=self.img,

                           filters=32,

                           k_size=5,

                           stride=1,

                           padding='SAME',

                           scope_name='conv1')

         pool1 = maxpool(conv1, 2, 2, 'VALID', 'pool1')

         conv2 = conv_relu(inputs=pool1,

                           filters=64,

                           k_size=5,

                           stride=1,

                           padding='SAME',

                           scope_name='conv2')

         pool2 = maxpool(conv2, 2, 2, 'VALID', 'pool2')

         feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]

         pool2 = tf.reshape(pool2, [-1, feature_dim])

         fc = tf.nn.relu(fully_connected(pool2, 1024, 'fc'))

         dropout = tf.layers.dropout(fc, self.keep_prob, name='dropout')

         self.logits = fully_connected(dropout, self.n_classes, 'logits')

     def inference_by_layers(self):

         '''

         Build the model according to the description we've shown in class

         Define the model by using tf.layers

         '''

         conv1 = tf.layers.conv2d(inputs=self.img,

                                   filters=32,

                                   kernel_size=[5, 5],

                                   padding='SAME',

                                   activation=tf.nn.relu,

                                   name='conv1')

         pool1 = tf.layers.max_pooling2d(inputs=conv1,

                                         pool_size=[2, 2],

                                         strides=2,

                                         name='pool1')

         conv2 = tf.layers.conv2d(inputs=pool1,

                                   filters=64,

                                   kernel_size=[5, 5],

                                   padding='SAME',

                                   activation=tf.nn.relu,

                                   name='conv2')

         pool2 = tf.layers.max_pooling2d(inputs=conv2,

                                         pool_size=[2, 2],

                                         strides=2,

                                         name='pool2')

         feature_dim = pool2.shape[1] * pool2.shape[2] * pool2.shape[3]

         pool2 = tf.reshape(pool2, [-1, feature_dim])

         fc = tf.layers.dense(pool2, 1024, activation=tf.nn.relu, name='fc')

         dropout = tf.layers.dropout(fc,

                                     self.keep_prob,

                                     training=self.training,

                                     name='dropout')

         self.logits = tf.layers.dense(dropout, self.n_classes, name='logits')

     def loss(self):

         '''

         define loss function

         use softmax cross entropy with logits as the loss function

         tf.nn.softmax_cross_entropy_with_logits

         softmax is applied internally

         don't forget to compute mean cross all sample in a batch

         '''

         with tf.name_scope('loss'):

             cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits=self.logits, labels=self.label))

             self.loss = cross_entropy

     def optimize(self):

         '''

         Define training op

         using Adam Gradient Descent to minimize cost

         Don't forget to use global step

         '''

         optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.loss, global_step=self.gstep)

         self.opt = optimizer

     def summary(self):

         '''

         Create summaries to write on TensorBoard

         Remember to track both training loss and test accuracy

         '''

         with tf.name_scope('summaries'):

             tf.summary.scalar('loss', self.loss)

             tf.summary.scalar('accuracy', self.accuracy)

             tf.summary.histogram('histogram loss', self.loss)

             self.summary_op = tf.summary.merge_all()

     def eval(self):

         '''

         Count the number of right predictions in a batch

         '''

         with tf.name_scope('predict'):

             preds = tf.nn.softmax(self.logits)

             correct_preds = tf.equal(tf.argmax(preds, 1), tf.argmax(self.label, 1))

             self.accuracy = tf.reduce_sum(tf.cast(correct_preds, tf.float32))

     def build(self):

         '''

         Build the computation graph

         '''

         self.get_data()

         self.inference_by_layers()

         self.loss()

         self.optimize()

         self.eval()

         self.summary()

     def train_one_epoch(self, sess, saver, init, writer, epoch, step):

         start_time = time.time()

         sess.run(init)

         total_loss = 0

         n_batches = 0

         try:

             while True:

                 _, l, summaries = sess.run([self.opt, self.loss, self.summary_op])

                 writer.add_summary(summaries, global_step=step)

                 if (step + 1) % self.skip_step == 0:

                     print('Loss at step {0}: {1}'.format(step, l))

                 step += 1

                 total_loss += l

                 n_batches += 1

         except tf.errors.OutOfRangeError:

             pass

         saver.save(sess, 'checkpoints/convnet_starter/mnist-convnet', step)

         print('Average loss at epoch {0}: {1}'.format(epoch, total_loss/n_batches))

         print('Took: {0} seconds'.format(time.time() - start_time))

         return step

     def eval_once(self, sess, init, writer, epoch, step):

         start_time = time.time()

         sess.run(init)

         total_correct_preds = 0

         try:

             while True:

                 accuracy_batch, summaries = sess.run([self.accuracy, self.summary_op])

                 writer.add_summary(summaries, global_step=step)

                 total_correct_preds += accuracy_batch

         except tf.errors.OutOfRangeError:

             pass

         print('Accuracy at epoch {0}: {1} '.format(epoch, total_correct_preds/self.n_test))

         print('Took: {0} seconds'.format(time.time() - start_time))

     def train(self, n_epochs):

         '''

         The train function alternates between training one epoch and evaluating

         '''

         utils.safe_mkdir('checkpoints')

         utils.safe_mkdir('checkpoints/convnet_starter')

         writer = tf.summary.FileWriter('./graphs/convnet_starter', tf.get_default_graph())

         with tf.Session() as sess:

             sess.run(tf.global_variables_initializer())

             saver = tf.train.Saver()

             ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/convnet_starter/checkpoint'))

             if ckpt and ckpt.model_checkpoint_path:

                 saver.restore(sess, ckpt.model_checkpoint_path)

             step = self.gstep.eval()

             for epoch in range(n_epochs):

                 step = self.train_one_epoch(sess, saver, self.train_init, writer, epoch, step)

                 self.eval_once(sess, self.test_init, writer, epoch, step)

         writer.close()

 if __name__ == '__main__':

     model = ConvNet()

     model.build()

     model.train(n_epochs=15)

2. style transfer

 """ Implementation in TensorFlow of the paper

 A Neural Algorithm of Artistic Style (Gatys et al., 2016) 

 Created by Chip Huyen (chiphuyen@cs.stanford.edu)

 CS20: "TensorFlow for Deep Learning Research"

 cs20.stanford.edu

 For more details, please read the assignment handout:

 https://docs.google.com/document/d/1FpueD-3mScnD0SJQDtwmOb1FrSwo1NGowkXzMwPoLH4/edit?usp=sharing

 """

 import os

 os.environ['TF_CPP_MIN_LOG_LEVEL']=''

 import time

 import numpy as np

 import tensorflow as tf

 import load_vgg

 import utils

 def setup():

     utils.safe_mkdir('checkpoints')

     utils.safe_mkdir('checkpoints/style_transfer')

     utils.safe_mkdir('outputs')

     utils.safe_mkdir('graphs')

 class StyleTransfer(object):

     def __init__(self, content_img, style_img, img_width, img_height):

         '''

         img_width and img_height are the dimensions we expect from the generated image.

         We will resize input content image and input style image to match this dimension.

         Feel free to alter any hyperparameter here and see how it affects your training.

         '''

         self.img_width = img_width

         self.img_height = img_height

         self.content_img = utils.get_resized_image(content_img, img_width, img_height)

         self.style_img = utils.get_resized_image(style_img, img_width, img_height)

         self.initial_img = utils.generate_noise_image(self.content_img, img_width, img_height)

         ## create global step (gstep) and hyperparameters for the model

         self.content_layer = 'conv4_2'

         self.style_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']

         # content_w, style_w: corresponding weights for content loss and style loss

         self.content_w = 0.01

         self.style_w = 1

         # style_layer_w: weights for different style layers. deep layers have more weights

         self.style_layer_w = [0.5, 1.0, 1.5, 3.0, 4.0]

         self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step')

         self.lr = 2.0

     def create_input(self):

         '''

         We will use one input_img as a placeholder for the content image,

         style image, and generated image, because:

             1. they have the same dimension

             2. we have to extract the same set of features from them

         We use a variable instead of a placeholder because we're, at the same time,

         training the generated image to get the desirable result.

         Note: image height corresponds to number of rows, not columns.

         '''

         with tf.variable_scope('input') as scope:

             self.input_img = tf.get_variable('in_img',

                                         shape=([1, self.img_height, self.img_width, 3]),

                                         dtype=tf.float32,

                                         initializer=tf.zeros_initializer())

     def load_vgg(self):

         '''

         Load the saved model parameters of VGG-19, using the input_img

         as the input to compute the output at each layer of vgg.

         During training, VGG-19 mean-centered all images and found the mean pixels

         to be [123.68, 116.779, 103.939] along RGB dimensions. We have to subtract

         this mean from our images.

         '''

         self.vgg = load_vgg.VGG(self.input_img)

         self.vgg.load()

         self.content_img -= self.vgg.mean_pixels

         self.style_img -= self.vgg.mean_pixels

     def _content_loss(self, P, F):

         ''' Calculate the loss between the feature representation of the

         content image and the generated image.

         Inputs:

             P: content representation of the content image

             F: content representation of the generated image

             Read the assignment handout for more details

             Note: Don't use the coefficient 0.5 as defined in the paper.

             Use the coefficient defined in the assignment handout.

         '''

         self.content_loss = tf.reduce_sum((F - P) ** 2) / (4.0 * P.size)

     def _gram_matrix(self, F, N, M):

         """ Create and return the gram matrix for tensor F

             Hint: you'll first have to reshape F

         """

         F = tf.reshape(F, (M, N))

         return tf.matmul(tf.transpose(F), F)

     def _single_style_loss(self, a, g):

         """ Calculate the style loss at a certain layer

         Inputs:

             a is the feature representation of the style image at that layer

             g is the feature representation of the generated image at that layer

         Output:

             the style loss at a certain layer (which is E_l in the paper)

         Hint: 1. you'll have to use the function _gram_matrix()

             2. we'll use the same coefficient for style loss as in the paper

             3. a and g are feature representation, not gram matrices

         """

         N = a.shape[3]                      # number of filters

         M = a.shape[1] * a.shape[2]         # height times width of the feature map

         G = self._gram_matrix(g, N, M)

         A = self._gram_matrix(a, N, M)

         return tf.reduce_sum((G-A) ** 2 / (4*N*N*M*M))

     def _style_loss(self, A):

         """ Calculate the total style loss as a weighted sum

         of style losses at all style layers

         Hint: you'll have to use _single_style_loss()

         """

         n_layers = len(self.style_layers)

         E = [self._single_style_loss(A[i], getattr(self.vgg, self.style_layers[i])) for i in range(n_layers)]

         self.style_loss = sum([self.style_layer_w[i] * E[i] for i in range(n_layers)])

     def losses(self):

         with tf.variable_scope('losses') as scope:

             with tf.Session() as sess:

                 # assign content image to the input variable

                 sess.run(self.input_img.assign(self.content_img))

                 gen_img_content = getattr(self.vgg, self.content_layer)

                 content_img_content = sess.run(gen_img_content)

             self._content_loss(content_img_content, gen_img_content)

             with tf.Session() as sess:

                 sess.run(self.input_img.assign(self.style_img))

                 style_layers = sess.run([getattr(self.vgg, layer) for layer in self.style_layers])

             self._style_loss(style_layers)

             self.total_loss = self.content_w * self.content_loss + self.style_w * self.style_loss

     def optimize(self):

         optimizer = tf.train.AdamOptimizer(self.lr).minimize(self.total_loss, global_step=self.gstep)

         self.opt = optimizer

     def create_summary(self):

         with tf.name_scope('summaries'):

             tf.summary.scalar('content loss', self.content_loss)

             tf.summary.scalar('style loss', self.style_loss)

             tf.summary.scalar('total loss', self.total_loss)

             tf.summary.histogram('histogram content loss', self.content_loss)

             tf.summary.histogram('histogram style loss', self.style_loss)

             tf.summary.histogram('histogram total loss', self.total_loss)

             self.summary_op = tf.summary.merge_all()

     def build(self):

         self.create_input()

         self.load_vgg()

         self.losses()

         self.optimize()

         self.create_summary()

     def train(self, n_iters):

         skip_step = 1

         with tf.Session() as sess:

             ## 1. initialize your variables

             ## 2. create writer to write your graph

             sess.run(tf.global_variables_initializer())

             writer = tf.summary.FileWriter('./graphs/style_transfer', tf.get_default_graph())

             sess.run(self.input_img.assign(self.initial_img))

             ## 3. create a saver object

             ## 4. check if a checkpoint exists, restore the variables

             saver = tf.train.Saver()

             ckpt = tf.train.get_checkpoint_state(os.path.dirname('checkpoints/checkpoint'))

             if ckpt and ckpt.model_checkpoint_path:

                 saver.restore(sess, ckpt.model_checkpoint_path)

             initial_step = self.gstep.eval()

             start_time = time.time()

             for index in range(initial_step, n_iters):

                 if index >= 5 and index < 20:

                     skip_step = 10

                 elif index >= 20:

                     skip_step = 20

                 sess.run(self.opt)

                 if (index + 1) % skip_step == 0:

                     ## obtain generated image, loss, and summary

                     gen_image, total_loss, summary = sess.run([self.input_img, self.total_loss, self.summary_op])

                     # add back the mean pixels we subtracted before

                     gen_image = gen_image + self.vgg.mean_pixels

                     writer.add_summary(summary, global_step=index)

                     print('Step {}\n   Sum: {:5.1f}'.format(index + 1, np.sum(gen_image)))

                     print('   Loss: {:5.1f}'.format(total_loss))

                     print('   Took: {} seconds'.format(time.time() - start_time))

                     start_time = time.time()

                     filename = 'outputs/%d.png' % (index)

                     utils.save_image(filename, gen_image)

                     if (index + 1) % 20 == 0:

                         # save the variables into a checkpoint

                         saver.save(sess, 'checkpoints/style_transfer', index)

 if __name__ == '__main__':

     setup()

     machine = StyleTransfer('content/deadpool.jpg', 'styles/harlequin.jpg', 333, 250)

     machine.build()

     machine.train(300)

码农公寓

相关文章