目录
Nesterov Momentum(又叫Nesterov Accelerated Gradient)
Adam(Adaptive Moment Estimate)
代码实现:
from __future__ import print_function, division import numpy as np import tensorflow as tf from keras.datasets import imdb from tensorflow.contrib.rnn import GRUCell from tensorflow.python.ops.rnn import bidirectional_dynamic_rnn as bi_rnn from tqdm import tqdm from attention import attention from utils import get_vocabulary_size, fit_in_vocabulary, zero_pad, batch_generator NUM_WORDS = 10000 #总的文本数 INDEX_FROM = 3 #文本有三类 SEQUENCE_LENGTH = 250 #句子长度 EMBEDDING_DIM = 100 #词向量的维度 HIDDEN_SIZE = 150 #rnn节点数 ATTENTION_SIZE = 50 #attention_size KEEP_PROB = 0.8 BATCH_SIZE = 256 #批次大小 NUM_EPOCHS = 3 # Model easily overfits without pre-trained words embeddings, that's why train for a few epochs DELTA = 0.5 MODEL_PATH = './model' # Load the data set 下载数据集 从三类文本中下载10000条文本 (X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=NUM_WORDS, index_from=INDEX_FROM) # Sequences pre-processing 句子级别的预处理 vocabulary_size = get_vocabulary_size(X_train) #词典大小 X_test = fit_in_vocabulary(X_test, vocabulary_size) X_train = zero_pad(X_train, SEQUENCE_LENGTH) X_test = zero_pad(X_test, SEQUENCE_LENGTH) # Different placeholders with tf.name_scope('Inputs'): batch_ph = tf.placeholder(tf.int32, [None, SEQUENCE_LENGTH], name='batch_ph') target_ph = tf.placeholder(tf.float32, [None], name='target_ph') seq_len_ph = tf.placeholder(tf.int32, [None], name='seq_len_ph') keep_prob_ph = tf.placeholder(tf.float32, name='keep_prob_ph') # Embedding layer with tf.name_scope('Embedding_layer'): embeddings_var = tf.Variable(tf.random_uniform([vocabulary_size, EMBEDDING_DIM], -1.0, 1.0), trainable=True) tf.summary.histogram('embeddings_var', embeddings_var) batch_embedded = tf.nn.embedding_lookup(embeddings_var, batch_ph) # (Bi-)RNN layer(-s) rnn_outputs, _ = bi_rnn(GRUCell(HIDDEN_SIZE), GRUCell(HIDDEN_SIZE), inputs=batch_embedded, sequence_length=seq_len_ph, dtype=tf.float32) tf.summary.histogram('RNN_outputs', rnn_outputs) # Attention layer with tf.name_scope('Attention_layer'): attention_output, alphas = attention(rnn_outputs, ATTENTION_SIZE, return_alphas=True) tf.summary.histogram('alphas', alphas) # Dropout drop = tf.nn.dropout(attention_output, keep_prob_ph) # Fully connected layer with tf.name_scope('Fully_connected_layer'): W = tf.Variable(tf.truncated_normal([HIDDEN_SIZE * 2, 1], stddev=0.1)) # Hidden size is multiplied by 2 for Bi-RNN b = tf.Variable(tf.constant(0., shape=[1])) y_hat = tf.nn.xw_plus_b(drop, W, b) y_hat = tf.squeeze(y_hat) tf.summary.histogram('W', W) with tf.name_scope('Metrics'): # Cross-entropy loss and optimizer initialization loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_hat, labels=target_ph)) tf.summary.scalar('loss', loss) optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(loss) # Accuracy metric accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), target_ph), tf.float32)) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() # Batch generators train_batch_generator = batch_generator(X_train, y_train, BATCH_SIZE) test_batch_generator = batch_generator(X_test, y_test, BATCH_SIZE) train_writer = tf.summary.FileWriter('./logdir/train', accuracy.graph) test_writer = tf.summary.FileWriter('./logdir/test', accuracy.graph) #session_conf = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) #创建一个saver对象 在创建这个Saver对象的时候,有一个参数我们经常会用到,就是 max_to_keep 参数,这个是用来设置保存模型的个数,默认为5, # 即 max_to_keep=5,保存最近的5个模型。如果你想每训练一代(epoch)就想保存一次模型,则可以将 max_to_keep设置为None或者0 saver = tf.train.Saver() if __name__ == "__main__": # with tf.Session(config=session_conf) as sess: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print("Start learning...") for epoch in range(NUM_EPOCHS): loss_train = 0 loss_test = 0 accuracy_train = 0 accuracy_test = 0 print("epoch: {}\t".format(epoch), end="") # Training num_batches = X_train.shape[0] // BATCH_SIZE #每个批次训练的次数 for b in tqdm(range(num_batches)): x_batch, y_batch = next(train_batch_generator) seq_len = np.array([list(x).index(0) + 1 for x in x_batch]) # actual lengths of sequences loss_tr, acc, _, summary = sess.run([loss, accuracy, optimizer, merged], feed_dict={batch_ph: x_batch, target_ph: y_batch, seq_len_ph: seq_len, keep_prob_ph: KEEP_PROB}) accuracy_train += acc loss_train = loss_tr * DELTA + loss_train * (1 - DELTA) train_writer.add_summary(summary, b + num_batches * epoch) accuracy_train /= num_batches #训练集的准确率 # Testing num_batches = X_test.shape[0] // BATCH_SIZE for b in tqdm(range(num_batches)): x_batch, y_batch = next(test_batch_generator) seq_len = np.array([list(x).index(0) + 1 for x in x_batch]) # actual lengths of sequences loss_test_batch, acc, summary = sess.run([loss, accuracy, merged], feed_dict={batch_ph: x_batch, target_ph: y_batch, seq_len_ph: seq_len, keep_prob_ph: 1.0}) accuracy_test += acc loss_test += loss_test_batch test_writer.add_summary(summary, b + num_batches * epoch) accuracy_test /= num_batches loss_test /= num_batches print("loss: {:.3f}, val_loss: {:.3f}, acc: {:.3f}, val_acc: {:.3f}".format( loss_train, loss_test, accuracy_train, accuracy_test )) #输出训练集和验证集的损失和准确率 train_writer.close() test_writer.close() saver.save(sess, MODEL_PATH)#saver.save(sess,"模型保存的路径",global_step=step)第三个参数将训练的次数作为后缀加入到模型名字中 print("Run 'tensorboard --logdir=./logdir' to checkout tensorboard logs.")
https://www.cnblogs.com/sunhongwen/p/9419318.html
https://blog.csdn.net/weixin_29260031/article/details/82320525
https://blog.csdn.net/autocyz/article/details/83114245