TextCNN

# coding: utf-8
import pickle
import logging
import tensorflow as tf
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s',level=logging.INFO)

class TextCNN(object):
    """
    A CNN for text classification.
    Uses an embedding layer, followed by a convolution, max-pooling and soft-max layer.
    """
    def __init__(self, config):
        self.lr = config['lr']
        self.batch_size = config['batch_size']
        # 词典的大小
        self.vocab_size = config['vocab_size']
        self.num_classes = config['num_classes']
        self.keep_prob = config['keep_prob']
        # length of word embedding词向量的大小
        self.embedding_size = config['embedding_size']
        # seting filter sizes, type of list卷积核的大小,列表形式
        self.filter_sizes = config['filter_sizes']
        # max length of sentence句子的最大行数
        self.sentence_length = config['sentence_length']
        # number of filters卷积核的个数
        self.num_filters = config['num_filters']

    def add_placeholders(self):
        self.X = tf.placeholder('int32', [None, self.sentence_length])
        self.y = tf.placeholder('int32', [None, ])

    def inference(self):
        with tf.variable_scope('embedding_layer'):
            # loading embedding weights
            with open('Text_cnn/embedding_matrix.pkl','rb') as f:
                embedding_weights = pickle.load(f)
            # non-static 
            self.W = tf.Variable(embedding_weights, trainable=True, name='embedding_weights',dtype='float32')
            # shape of embedding chars is (None, sentence_length, embedding_size)
            self.embedding_chars = tf.nn.embedding_lookup(self.W, self.X)
            # shape of embedding char expanded is (None, sentence_length, embedding_size, 1)
            self.embedding_chars_expanded = tf.expand_dims(self.embedding_chars, -1)
        with tf.variable_scope('convolution_pooling_layer'):
            pooled_outputs = []#池化层输出
            for i, filter_size in enumerate(self.filter_sizes):
                filter_shape = [filter_size, self.embedding_size, 1, self.num_filters]
                W = tf.get_variable('W'+str(i), shape=filter_shape,
                                    initializer=tf.truncated_normal_initializer(stddev=0.1))#从截断的正态分布中输出随机值。 生成的值服从具有指定平均值和标准偏差的正态分布,
                b = tf.get_variable('b'+str(i), shape=[self.num_filters],
                                    initializer=tf.zeros_initializer())
                conv = tf.nn.conv2d(self.embedding_chars_expanded, W, strides=[1,1,1,1],
                                    padding='VALID', name='conv'+str(i))
                # apply nonlinearity
                h = tf.nn.relu(tf.add(conv, b))
                # max pooling
                pooled = tf.nn.max_pool(h, ksize=[1, self.sentence_length - filter_size + 1, 1, 1],
                    strides=[1, 1, 1, 1], padding='VALID', name="pool")
                # shape of pooled is (?,1,1,300)
                pooled_outputs.append(pooled)
            # combine all the pooled features
            self.feature_length = self.num_filters * len(self.filter_sizes)
            self.h_pool = tf.concat(pooled_outputs,3)
            # shape of (?, 900)
            self.h_pool_flat = tf.reshape(self.h_pool, [-1, self.feature_length])
        # add dropout before softmax layer
        with tf.variable_scope('dropout_layer'):
            # shape of [None, feature_length]
            self.features = tf.nn.dropout(self.h_pool_flat, self.keep_prob)
        # fully-connection layer
        with tf.variable_scope('fully_connection_layer'):
            W = tf.get_variable('W', shape=[self.feature_length, self.num_classes],
                                initializer=tf.contrib.layers.xavier_initializer())#初始化权重矩阵该函数返回一个用于初始化权重的初始化程序 “Xavier” 。这个初始化器是用来保持每一层的梯度大小都差不多相同。
            b = tf.get_variable('b', shape=[self.num_classes],
                                initializer=tf.constant_initializer(0.1))#初始化为常数
            # shape of [None, 2]
            self.y_out = tf.matmul(self.features, W) + b#将矩阵a 乘于 矩阵b。
            self.y_prob = tf.nn.softmax(self.y_out)#预测概率

    def add_loss(self):
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=self.y, logits=self.y_out)
        self.loss = tf.reduce_mean(loss)#根据给出的axis在input_tensor上求平均值。除非keep_dims为真,axis中的每个的张量秩会减少1。如果keep_dims为真,求平均值的维度的长度都会保持为1.如果不设置axis,所有维度上的元素都会被求平均值,并且只会返回一个只有一个元素的张量。
        tf.summary.scalar('loss',self.loss)#,能够保存训练过程以及参数分布图并在tensorboard显示

    def add_metric(self):
        self.y_pred = self.y_prob[:,1] > 0.5
        self.precision, self.precision_op = tf.metrics.precision(self.y, self.y_pred)
        self.recall, self.recall_op = tf.metrics.recall(self.y, self.y_pred)
        # add precision and recall to summary
        tf.summary.scalar('precision', self.precision)
        tf.summary.scalar('recall', self.recall)

    def train(self):
        # Applies exponential decay to learning rate
        self.global_step = tf.Variable(0, trainable=False)#代表全局步数,比如在多少步该进行什么操作,现在神经网络训练到多少轮等等,类似于一个钟表。
        # define optimizer
        optimizer = tf.train.AdamOptimizer(self.lr)
        extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)#返回指定集合的变量,对参数进行更新进行更新
        with tf.control_dependencies(extra_update_ops):#指定某些操作执行的依赖关系,这时我们可以使用tf.control_dependencies()来实现。 control_dependencies(control_inputs)返回一个控制依赖的上下文管理器
            self.train_op = optimizer.minimize(self.loss, global_step=self.global_step)#损失函数优化器的minimize()中global_step=global_steps能够提供global_step自动加一的操作。

    def build_graph(self):
        """build graph for model"""
        self.add_placeholders()
        self.inference()
        self.add_loss()
        self.add_metric()
        self.train()

 

上一篇:[论文阅读笔记] Community aware random walk for network embedding


下一篇:TextCNN模型详细解析(2017年知乎竞赛第一名架构)