Keras实现CNN、RNN(基于attention 的双向RNN)及两者的融合

Keras实现CNN、RNN(基于attention 的双向RNN)及两者的融合

2018年04月24日 10:50:34 AI_盲 阅读数 7920更多

分类专栏: python 算法 machine learning deep learning keras

版权声明:本文为博主原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。

本文链接:https://blog.csdn.net/xwd18280820053/article/details/80060544

本文主要采用CNN,RNN对时序数据进行二分类

CNN处理时序数据的二分类


 
  1. model = Sequential()

  2. model.add(Conv1D(128, 3, padding='same', input_shape=(max_lenth, max_features)))

  3. model.add(BatchNormalization())

  4. model.add(Activation('relu'))

  5. model.add(Conv1D(256, 3))

  6. model.add(BatchNormalization())

  7. model.add(Activation('relu'))

  8. model.add(Conv1D(128, 3))

  9. model.add(BatchNormalization())

  10. model.add(Activation('relu'))

  11. model.add(GlobalAveragePooling1D()) #时序的时间维度上全局池化

  12. model.add(Dropout(0.5))

  13. model.add(Dense(1))

  14. model.add(Activation('sigmoid'))

  15.  
  16. model.compile(loss='binary_crossentropy',

  17. optimizer='adam',

  18. metrics=[metrics.binary_crossentropy])

 

双层RNN处理时序数据的二分类


 
  1. import numpy as np

  2. import tensorflow as tf

  3. from keras.models import Sequential

  4. from keras.layers import Dense, Dropout

  5. from keras.layers import GRU

  6. import keras

  7. from keras import regularizers

  8. from keras.callbacks import EarlyStopping

  9. from sklearn.metrics import roc_auc_score

  10. from sklearn.cross_validation import StratifiedKFold

  11. from keras import backend as K

  12. import my_callbacks

  13. from keras.layers.normalization import BatchNormalization

  14. import keras.backend.tensorflow_backend as KTF

  15. max_lenth = 23

  16. max_features = 12

  17. training_iters = 2000

  18. train_batch_size = 800

  19. test_batch_size = 800

  20. n_hidden_units = 64

  21. lr = 0.0003

  22. cb = [

  23. my_callbacks.RocAucMetricCallback(), # include it before EarlyStopping!

  24. EarlyStopping(monitor='roc_auc_val',patience=200, verbose=2,mode='max')

  25. ]

  26. model = Sequential()

  27. model.add(keras.layers.core.Masking(mask_value=0., input_shape=(max_lenth, max_features)))

  28. model.add(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  29. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  30. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  31. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=True,#多层时需设置为true

  32. return_state=False, go_backwards=False, stateful=False, unroll=False)) #input_shape=(max_lenth, max_features),

  33. model.add(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  34. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  35. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  36. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=False,

  37. return_state=False, go_backwards=False, stateful=False, unroll=False)) #input_shape=(max_lenth, max_features),

  38. model.add(Dropout(0.5))

  39.  
  40. model.add(Dense(1))

  41. model.add(BatchNormalization())

  42. model.add(keras.layers.core.Activation('sigmoid'))

  43.  
  44. model.compile(loss='binary_crossentropy',

  45. optimizer='adam',

  46. metrics=[metrics.binary_crossentropy])

  47. model.fit(x_train, y_train, batch_size=train_batch_size, epochs=training_iters, verbose=2,

  48. callbacks=cb,validation_split=0.2,

  49. shuffle=True, class_weight=class_weight, sample_weight=None, initial_epoch=0)

  50. pred_y = model.predict(x_test, batch_size=test_batch_size)

  51. score = roc_auc_score(y_test,pred_y)

加入attention机制的 双向RNN(attention即对所有时刻的输出乘上对应的权重相加作为最终输出)


 
  1. from keras import backend as K

  2. from keras.layers import Layer

  3. from keras import initializers, regularizers, constraints

  4.  
  5.  
  6. def dot_product(x, kernel):

  7. """

  8. Wrapper for dot product operation, in order to be compatible with both

  9. Theano and Tensorflow

  10. Args:

  11. x (): input

  12. kernel (): weights

  13. Returns:

  14. """

  15. if K.backend() == 'tensorflow':

  16. return K.squeeze(K.dot(x, K.expand_dims(kernel)), axis=-1)

  17. else:

  18. return K.dot(x, kernel)

  19.  
  20.  
  21. class AttentionWithContext(Layer):

  22. """

  23. Attention operation, with a context/query vector, for temporal data.

  24. Supports Masking.

  25. Follows the work of Yang et al. [https://www.cs.cmu.edu/~diyiy/docs/naacl16.pdf]

  26. "Hierarchical Attention Networks for Document Classification"

  27. by using a context vector to assist the attention

  28. # Input shape

  29. 3D tensor with shape: `(samples, steps, features)`.

  30. # Output shape

  31. 2D tensor with shape: `(samples, features)`.

  32. How to use:

  33. Just put it on top of an RNN Layer (GRU/LSTM/SimpleRNN) with return_sequences=True.

  34. The dimensions are inferred based on the output shape of the RNN.

  35. Note: The layer has been tested with Keras 2.0.6

  36. Example:

  37. model.add(LSTM(64, return_sequences=True))

  38. model.add(AttentionWithContext())

  39. # next add a Dense layer (for classification/regression) or whatever...

  40. """

  41.  
  42. def __init__(self,

  43. W_regularizer=None, u_regularizer=None, b_regularizer=None,

  44. W_constraint=None, u_constraint=None, b_constraint=None,

  45. bias=True, **kwargs):

  46.  
  47. self.supports_masking = True

  48. self.init = initializers.get('glorot_uniform')

  49.  
  50. self.W_regularizer = regularizers.get(W_regularizer)

  51. self.u_regularizer = regularizers.get(u_regularizer)

  52. self.b_regularizer = regularizers.get(b_regularizer)

  53.  
  54. self.W_constraint = constraints.get(W_constraint)

  55. self.u_constraint = constraints.get(u_constraint)

  56. self.b_constraint = constraints.get(b_constraint)

  57.  
  58. self.bias = bias

  59. super(AttentionWithContext, self).__init__(**kwargs)

  60.  
  61. def build(self, input_shape):

  62. assert len(input_shape) == 3

  63.  
  64. self.W = self.add_weight((input_shape[-1], input_shape[-1],),

  65. initializer=self.init,

  66. name='{}_W'.format(self.name),

  67. regularizer=self.W_regularizer,

  68. constraint=self.W_constraint)

  69. if self.bias:

  70. self.b = self.add_weight((input_shape[-1],),

  71. initializer='zero',

  72. name='{}_b'.format(self.name),

  73. regularizer=self.b_regularizer,

  74. constraint=self.b_constraint)

  75.  
  76. self.u = self.add_weight((input_shape[-1],),

  77. initializer=self.init,

  78. name='{}_u'.format(self.name),

  79. regularizer=self.u_regularizer,

  80. constraint=self.u_constraint)

  81.  
  82. super(AttentionWithContext, self).build(input_shape)

  83.  
  84. def compute_mask(self, input, input_mask=None):

  85. # do not pass the mask to the next layers

  86. return None

  87.  
  88. def call(self, x, mask=None):

  89. uit = dot_product(x, self.W)

  90.  
  91. if self.bias:

  92. uit += self.b

  93.  
  94. uit = K.tanh(uit)

  95. ait = dot_product(uit, self.u)

  96.  
  97. a = K.exp(ait)

  98.  
  99. # apply mask after the exp. will be re-normalized next

  100. if mask is not None:

  101. # Cast the mask to floatX to avoid float64 upcasting in theano

  102. a *= K.cast(mask, K.floatx())

  103.  
  104. # in some cases especially in the early stages of training the sum may be almost zero

  105. # and this results in NaN's. A workaround is to add a very small positive number ε to the sum.

  106. # a /= K.cast(K.sum(a, axis=1, keepdims=True), K.floatx())

  107. a /= K.cast(K.sum(a, axis=1, keepdims=True) + K.epsilon(), K.floatx())

  108.  
  109. a = K.expand_dims(a)

  110. weighted_input = x * a

  111. return K.sum(weighted_input, axis=1)

  112.  
  113. def compute_output_shape(self, input_shape):

  114. return input_shape[0], input_shape[-1]


 
  1. model = Sequential()

  2. model.add(keras.layers.core.Masking(mask_value=0., input_shape=(max_lenth, max_features)))

  3. model.add(Bidirectional(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  4. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  5. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  6. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=True,#多层时需设置为true

  7. return_state=False, go_backwards=False, stateful=False, unroll=False),merge_mode='concat')) #input_shape=(max_lenth, max_features),

  8. model.add(Bidirectional(GRU(units=n_hidden_units,activation='selu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  9. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  10. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  11. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=True,

  12. return_state=False, go_backwards=False, stateful=False, unroll=False),merge_mode='concat')) #input_shape=(max_lenth, max_features),

  13. model.add(Dropout(0.5))

  14. model.add(AttentionWithContext())

  15. model.add(Dense(1))

  16. model.add(BatchNormalization())

  17. model.add(keras.layers.core.Activation('sigmoid'))

  18.  
  19. model.compile(loss='binary_crossentropy',

  20. optimizer='adam',

  21. metrics=[metrics.binary_crossentropy])

CNN-RNN融合


 
  1. class NonMasking(Layer):

  2. def __init__(self, **kwargs):

  3. self.supports_masking = True

  4. super(NonMasking, self).__init__(**kwargs)

  5.  
  6. def build(self, input_shape):

  7. input_shape = input_shape

  8.  
  9. def compute_mask(self, input, input_mask=None):

  10. # do not pass the mask to the next layers

  11. return None

  12.  
  13. def call(self, x, mask=None):

  14. return x

  15.  
  16. def get_output_shape_for(self, input_shape):

  17. return input_shape

  18.  
  19. model_left = Sequential()

  20. model_left.add(keras.layers.core.Masking(mask_value=0., input_shape=(max_lenth, max_features))) #解决不同长度的序列问题

  21. model_left.add(GRU(units=left_hidden_units,activation='relu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  22. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  23. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  24. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=True,#多层时需设置为true

  25. return_state=False, go_backwards=False, stateful=False, unroll=False))

  26. model_left.add(GRU(units=left_hidden_units,activation='relu',kernel_initializer='orthogonal', recurrent_initializer='orthogonal',

  27. bias_initializer='zeros', kernel_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01),

  28. bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, recurrent_constraint=None,

  29. bias_constraint=None, dropout=0.5, recurrent_dropout=0.0, implementation=1, return_sequences=True,

  30. return_state=False, go_backwards=False, stateful=False, unroll=False))

  31. model_left.add(NonMasking()) #Flatten()不支持masking,此处用于unmask

  32. model_left.add(Flatten())

  33.  
  34. ## FCN

  35. model_right = Sequential()

  36. model_right.add(Conv1D(128, 3, padding='same', input_shape=(max_lenth, max_features)))

  37. model_right.add(BatchNormalization())

  38. model_right.add(Activation('relu'))

  39. model_right.add(Conv1D(256, 3))

  40. model_right.add(BatchNormalization())

  41. model_right.add(Activation('relu'))

  42. model_right.add(Conv1D(128, 3))

  43. model_right.add(BatchNormalization())

  44. model_right.add(Activation('relu'))

  45. model_right.add(GlobalAveragePooling1D())

  46. model_right.add(Reshape((1,1,-1)))

  47. model_right.add(Flatten())

  48.  
  49. model = Sequential()

  50. model.add(Merge([model_left,model_right], mode='concat'))

  51.  
  52. model.add(Dense(128))

  53. model.add(Dropout(0.5))

  54. model.add(Dense(1))

  55. model.add(BatchNormalization())

  56. model.add(Activation('sigmoid'))

  57.  
  58. model.compile(loss='binary_crossentropy',

  59. optimizer='adam',

  60. metrics=['accuracy'])

  61.  
  62. model.fit([left_x_train,right_x_train], y_train, batch_size=train_batch_size, epochs=training_iters, verbose=2,

  63. callbacks=[cb],validation_split=0.2,

  64. shuffle=True, class_weight=None, sample_weight=None, initial_epoch=0)

  65. pred_y = model.predict([left_x_test,right_x_test], batch_size=test_batch_size)

  66. score = roc_auc_score(y_test,pred_y)

上一篇:Android为TV端助力之无法依赖constraint-layout:1.1.3(转发)


下一篇:ORACL基本表操作常用语句整理