核心代码
#(2)、建立ST定位网络:尝试更多的conv层,并分别在X轴和y轴上做最大池化
# localization net. TODO: try more conv layers, and do max pooling on X- and Y-axes respectively
locnet = Sequential()
# locnet.add(MaxPooling2D(pool_size=(2,2), input_shape=input_shape))
# locnet.add(Convolution2D(32, (5, 5)))
locnet.add(Convolution2D(32, (5, 5), input_shape=input_shape))
locnet.add(Activation('relu'))
# locnet.add(Dropout(0.2)) # 0.2
locnet.add(MaxPooling2D(pool_size=(2,2)))
locnet.add(Convolution2D(64, (5, 5)))
locnet.add(Activation('relu'))
# locnet.add(Dropout(0.2)) # 0.3
locnet.add(Convolution2D(64, (3, 3)))
locnet.add(Activation('relu'))
locnet.add(MaxPooling2D(pool_size=(2,2)))
locnet.add(Flatten())
locnet.add(Dense(50))
locnet.add(Activation('relu'))
locnet.add(Dense(6, weights=weights))
print(locnet.summary())
#(3)、建立CNN网络
model = Sequential()
model.add(SpatialTransformer(localization_net=locnet,
output_size=(30,30), input_shape=input_shape))
# model.add(Convolution2D(32, (3, 3), padding='same'))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Convolution2D(64, (3, 3)))
# model.add(Activation('relu'))
# model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Dropout(0.5)) # 0.25
# E: removed first 3 dropout layers
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(Dropout(0.5)) # 0.5
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(Dropout(0.5)) # 0.5
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(64, kernel_size=(3, 3),
activation='relu'))
model.add(Dropout(0.5)) # 0.5
model.add(MaxPooling2D(pool_size=(2, 2)))
# model.add(Conv2D(64, (3, 3), activation='relu'))
# model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(256)) # 256
model.add(Dropout(0.5)) # 0.5
model.add(Activation('relu'))
model.add(Dense(nb_classes))
model.add(Activation('softmax'))