Transformer+CNN

from tensorflow.keras.callbacks import EarlyStopping
import tensorflow as tf
import time
import numpy as np
import matplotlib.pyplot as plt
import sys
from tensorflow import keras
import os
from tensorflow import nn
import math

#   设置相关底层配置
physical_devices = tf.config.experimental.list_physical_devices('GPU')
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)

# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
# os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # 使用第2块gpu

def positional_encoding(pos, d_model):
    '''
    :param pos: 词在句子中的位置,句子上的维族;(i是d_model上的维度)
    :param d_model: 隐状态的维度,相当于num_units
    :return: 位置编码 shape=[1, position_num, d_model], 其中第一个维度是为了匹配batch_size
    '''
    def get_angles(position, i):
        # 这里的i相当于公式里面的2i或2i+1
        # 返回shape=[position_num, d_model]
        return position / np.power(10000., 2. * (i // 2.) / np.float(d_model))

    angle_rates = get_angles(np.arange(pos)[:, np.newaxis],
                             np.arange(d_model)[np.newaxis, :])
    # 2i位置使用sin编码,2i+1位置使用cos编码
    pe_sin = np.sin(angle_rates[:, 0::2])
    pe_cos = np.cos(angle_rates[:, 1::2])
    pos_encoding = np.concatenate([pe_sin, pe_cos], axis=-1)
    pos_encoding = tf.cast(pos_encoding[np.newaxis, ...], tf.float32)
    return pos_encoding

'''*************** 第一部分: Scaled dot-product attention ***************'''
def my_mask(inputs):
    # print('inputs:',inputs.shape)
    # print('mask前:',inputs)
    s_dim = inputs.shape[-1]
    mask = tf.ones((s_dim, s_dim))  # (T_q, T_k)
    # 这一句的意思是生成一个上三角矩阵,上三角矩阵用来对decoder的结果进行mask
    mask = tf.linalg.LinearOperatorLowerTriangular(mask).to_dense()
    padding_num = -2 ** 32 + 1.1
    mask_data = tf.multiply(mask, inputs)
    # print('mask_data:',float(mask_data))
    outputs = tf.where(tf.equal(mask_data, 0.), padding_num, mask_data)
    # so_data = tf.nn.softmax(outputs,axis=1)
    # print('mask后:',  outputs)
    # sys.exit(2)
    return outputs

def scaled_dot_product_attention(q, k, v, mask = None):
    '''attention(Q, K, V) = softmax(Q * K^T / sqrt(dk)) * V'''
    # query 和 Key相乘
    # print('q:',q.shape,q)
    matmul_qk = tf.matmul(q, k, transpose_b=True)
    # print('matmul_qk:',matmul_qk.shape,matmul_qk)
    # print('完毕')
    # 使用dk进行缩放
    dk = tf.cast(tf.shape(q)[-1], tf.float32)
    scaled_attention =matmul_qk / tf.math.sqrt(dk)
    # 掩码mask
    if mask is not None:
        print('有mask')
        scaled_attention = my_mask(scaled_attention)
    # 通过softmax获取attention权重, mask部分softmax后为0
    attention_weights = tf.nn.softmax(scaled_attention)  # shape=[batch_size, seq_len_q, seq_len_k]
    # print('attention_weights:',attention_weights)
    # 乘以value
    outputs = tf.matmul(attention_weights, v)  # shape=[batch_size, seq_len_q, depth]
    return outputs, attention_weights

'''*************** 第二部分: Multi-Head Attention ***************'''
'''
multi-head attention包含3部分: - 线性层与分头 - 缩放点积注意力 - 头连接 - 末尾线性层
每个多头注意块有三个输入; Q(查询),K(密钥),V(值)。 它们通过第一层线性层并分成多个头。
注意:点积注意力时需要使用mask, 多头输出需要使用tf.transpose调整各维度。
Q,K和V不是一个单独的注意头,而是分成多个头,因为它允许模型共同参与来自不同表征空间的不同信息。
在拆分之后,每个头部具有降低的维度,总计算成本与具有全维度的单个头部注意力相同。
'''
class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        # d_model必须可以正确分成多个头
        assert d_model % num_heads == 0
        # 分头之后维度
        self.depth = d_model // num_heads
        self.wq = tf.keras.layers.Dense(d_model)
        self.wk = tf.keras.layers.Dense(d_model)
        self.wv = tf.keras.layers.Dense(d_model)
        self.dense = tf.keras.layers.Dense(d_model)

    def split_heads(self, x, batch_size):
        # 分头,将头个数的维度,放到seq_len前面 x输入shape=[batch_size, seq_len, d_model]
        # print('split前.shape:',x.shape)
        x = tf.reshape(x, [batch_size, -1, self.num_heads, self.depth])
        # print('split后.shape:', x.shape)
        return tf.transpose(x, perm=[0, 2, 1, 3])



    def call(self, q, k, v, mask = None):
        # print('-----------------------------    multi   ---------------------------------------')
        # print('multil_q:',q.shape,q)
        # print('********************************   multi   ************************************')
        batch_size = tf.shape(q)[0]
        # 分头前的前向网络,根据q,k,v的输入,计算Q, K, V语义
        q = self.wq(q)  # shape=[batch_size, seq_len_q, d_model]
        k = self.wq(k)
        v = self.wq(v)
        # 分头
        q = self.split_heads(q, batch_size)  # shape=[batch_size, num_heads, seq_len_q, depth]
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        # 通过缩放点积注意力层
        # scaled_attention shape=[batch_size, num_heads, seq_len_q, depth]
        # attention_weights shape=[batch_size, num_heads, seq_len_q, seq_len_k]
        scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask)
        # 把多头维度后移
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # shape=[batch_size, seq_len_q, num_heads, depth]
        # 把多头合并
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # shape=[batch_size, seq_len_q, d_model]
        # 全连接重塑
        output = self.dense(concat_attention)
        return output, attention_weights

class LayerNormalization(tf.keras.layers.Layer):
    def __init__(self, epsilon=1e-8, **kwargs):
        super(LayerNormalization, self).__init__(**kwargs)
        self.epsilon = epsilon
    def build(self, input_shape):
        self.gamma = self.add_weight(name='gamma',
                                     shape=input_shape[-1:],
                                     initializer=tf.ones_initializer(),
                                     trainable=True)
        self.beta = self.add_weight(name='beta',
                                    shape=input_shape[-1:],
                                    initializer=tf.zeros_initializer(),
                                    trainable=True)
        super(LayerNormalization, self).build(input_shape)
    def call(self, x): # x shape=[batch_size, seq_len, d_model]
        mean = tf.keras.backend.mean(x, axis=-1, keepdims=True)
        std = tf.keras.backend.std(x, axis=-1, keepdims=True)
        return self.gamma * (x - mean) / (std + self.epsilon) + self.beta

def point_wise_feed_forward(d_model, diff):
    return tf.keras.Sequential([
        tf.keras.layers.Dense(diff, activation=tf.nn.relu),
        tf.keras.layers.Dense(d_model)
    ])
'''encoder layer:
每个编码层包含以下子层 - Multi-head attention(带掩码) - Point wise feed forward networks
每个子层中都有残差连接,并最后通过一个正则化层。残差连接有助于避免深度网络中的梯度消失问题。 
每个子层输出是LayerNorm(x + Sublayer(x)),规范化是在d_model维的向量上。Transformer一共有n个编码层。
'''
class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward(d_model, dff)
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, training):
        # multi head attention (encoder时Q = K = V)
        att_output, _ = self.mha(inputs, inputs, inputs)
        att_output = self.dropout1(att_output, training=training)
        output1 = self.layernorm1(inputs + att_output)  # shape=[batch_size, seq_len, d_model]
        # feed forward network
        ffn_output = self.ffn(output1)
        ffn_output = self.dropout2(ffn_output, training=training)
        output2 = self.layernorm2(output1 + ffn_output)  # shape=[batch_size, seq_len, d_model]
        return output2

class Encoder(tf.keras.layers.Layer):
    def __init__(self, d_model, num_layers, num_heads, dff,
               max_seq_len, dropout_rate=0.1):
        super(Encoder, self).__init__()
        self.indata = tf.keras.layers.Dense(d_model)
        self.num_layers = num_layers
        self.d_model = d_model
        # self.emb = tf.keras.layers.Embedding(5000, d_model)  # shape=[batch_size, seq_len, d_model]
        self.pos_encoding = positional_encoding(max_seq_len, d_model)  # shape=[1, max_seq_len, d_model]
        self.encoder_layer = [EncoderLayer(d_model, num_heads, dff, dropout_rate)
                              for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, training):
        # print('inputs.shape:',inputs.shape)
        # sys.exit(2)
        # 输入部分;inputs shape=[batch_size, seq_len]
        seq_len = inputs.shape[1]  # 句子真实长度
        # word_embedding = self.emb(inputs)  # shape=[batch_size, seq_len, d_model]
        # print('word_embedding .shape:',word_embedding .shape)
        # sys.exit(2)
        # word_embedding = self.indata(inputs)
        word_embedding = inputs
        word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        emb= word_embedding + self.pos_encoding[:, :seq_len, :]
        x = self.dropout(emb, training=training)
        for i in range(self.num_layers):
            x = self.encoder_layer[i](x, training)
        return x  # shape=[batch_size, seq_len, d_model]

class DecoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, dropout_rate=0.1):
        super(DecoderLayer, self).__init__()
        self.mha1 = MultiHeadAttention(d_model, num_heads)
        self.mha2 = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward(d_model, dff)
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        self.layernorm3 = LayerNormalization()
        self.dropout1 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout2 = tf.keras.layers.Dropout(dropout_rate)
        self.dropout3 = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, encoder_out, training):
        # masked multi-head attention: Q = K = V
        # print('decode_inputs:',inputs)
        # sys.exit(2)

        att_out1, att_weight1 = self.mha1(inputs, inputs, inputs,mask = True)

        att_out1 = self.dropout1(att_out1, training=training)
        att_out1 = self.layernorm1(inputs + att_out1)
        # multi-head attention: Q=att_out1, K = V = encoder_out
        # print('-------------------------    mh2 ---------------------------------------')
        att_out2, att_weight2 = self.mha2(att_out1, encoder_out, encoder_out)
        # print('att_out2 :', att_out2)
        att_out2 = self.dropout2(att_out2, training=training)
        att_out2 = self.layernorm2(att_out1 + att_out2)
        # att_out2 = self.layernorm2(att_out2)
        # print('att_out2 :',att_out2 )
        # sys.exit(2)
        # feed forward network
        ffn_out = self.ffn(att_out2)
        ffn_out = self.dropout3(ffn_out, training=training)
        output = self.layernorm3(att_out2 + ffn_out)
        return output, att_weight1, att_weight2

class Decoder(tf.keras.layers.Layer):
    def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
        super(Decoder, self).__init__()
        self.seq_len = tf.shape
        self.indata = tf.keras.layers.Dense(d_model)
        self.d_model = d_model
        self.num_layers = num_layers
        self.pos_encoding = positional_encoding(max_seq_len, d_model)
        self.decoder_layers = [DecoderLayer(d_model, num_heads, dff, dropout_rate)
                               for _ in range(num_layers)]
        self.dropout = tf.keras.layers.Dropout(dropout_rate)
    def call(self, inputs, encoder_out, training):
        # print('---------------------------------    decode  --------------------------------')
        seq_len = inputs.shape[1]
        attention_weights = {}
        word_embedding = self.indata(inputs)
        # print('inputs:',inputs.shape)
        # print('self.pos_encoding[:, :seq_len, :]:',self.pos_encoding[:, :seq_len, :].shape)
        # word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32))
        # emb = word_embedding + self.pos_encoding[:, :seq_len, :]
        emb = word_embedding
        # print('emb:',emb)
        # sys.exit(2)

        x = self.dropout(emb, training=training)
        for i in range(self.num_layers):
            # print('i:',i)
            x, att1, att2 = self.decoder_layers[i](x, encoder_out, training)
            attention_weights['decoder_layer{}_att_w1'.format(i+1)] = att1
            attention_weights['decoder_layer{}_att_w2'.format(i + 1)] = att2
        return x, attention_weights

def deinput_padding(seq_len,dim,batch_size):
    # print('pading_batch_size:',batch_size)
    zero = np.zeros((seq_len,dim))
    # print('zero:',zero.shape)
    zero[0,:] = 1

    zero = np.tile(zero,(batch_size,1,1))
    # print('zero:', zero, zero.shape)
    return zero

#   超参数
learn_rate = 2e-4
# learn_rate = 2.0e-4
epochs =5000
bat = 10

# path = './8000_np_img.npz'
path = './5000_np_img.npz'
# path = './2000_np_img.npz'
# path = './500_np_img.npz'
#
base_dim = 8
mid_dim = 10
dense_dim = 100
time_list = [1,2,4,8,16,32]

unit = 40
def c_b(chanel,kernel_size,stride = 1 ,padding ='valid'):
    initial = tf.keras.initializers.TruncatedNormal(stddev=0.02)
    result = keras.Sequential([
        tf.keras.layers.Conv2D(chanel, kernel_size=kernel_size, strides=stride, padding=padding,
                               kernel_initializer=initial),
        tf.keras.layers.BatchNormalization(),
        keras.layers.LeakyReLU()
    ])
    return result
class Transformer(tf.keras.Model):
    def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1):
        super(Transformer, self).__init__()
        self.layernorm1 = LayerNormalization()
        self.layernorm2 = LayerNormalization()
        # self.mycnn = tf.keras.Sequential([
        #     c_b(6, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(12, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(24, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(48, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #
        #     c_b(48, [3, 3], stride=1, padding='SAME'),
        #     tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
        #     tf.keras.layers.Flatten()
        # ])
        self.mycnn = tf.keras.Sequential([
            c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'),
            # # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2),
            c_b(base_dim * time_list[0], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[1], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[2], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[3], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[4], [3, 3], stride=2, padding='SAME'),

            c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'),
            c_b(base_dim * time_list[5], [3, 3], stride=2, padding='SAME'),

            # tf.keras.layers.Flatten()

            tf.keras.layers.GlobalAveragePooling2D()
        ])

        self.myDense1 = tf.keras.Sequential([
            # tf.keras.layers.Dense(10, activation='relu'),
            tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),
            tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu),

            tf.keras.layers.Dense(d_model)
            # tf.keras.layers.Dense(3)
        ])
        self.encoder = Encoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
        self.emb = tf.keras.layers.Dense(d_model)
        # self.decoder = Decoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate)
        self.dim_dense = tf.keras.layers.Dense(1)
        self.final_layer = tf.keras.layers.Dense(1)
        self.flat = tf.keras.layers.Flatten()
    def call(self, inputs):
        inputs = tf.cast(inputs, dtype=tf.float32)
        inputs = tf.reshape(inputs, (-1, 15, 101, 101, 1))

        inputs = tf.keras.layers.TimeDistributed(self.mycnn)(inputs)

        # print('out.shape:', out.shape,out[0,0,:5])
        out = tf.keras.layers.TimeDistributed(self.myDense1)(inputs)
        inputs = out
        # sys.exit(2)
        inputs = self.layernorm1(inputs)
        inputs = self.emb(inputs)
        print('trains_inputs:',inputs)
        # print('inputs.shape:', inputs.shape)
        # sys.exit(2)
        # 首先encoder过程,输出shape=[batch_size, seq_len_input, d_model]
        inputs = self.layernorm2(inputs)
        print('layerhoutrains_inputs:', inputs)
        encoder_output = self.encoder(inputs)
        # 再进行decoder, 输出shape=[batch_size, seq_len_target, d_model]
        encoder_output = self.dim_dense(encoder_output)
        print('encoder_output:',encoder_output.shape)
        encoder_output = self.flat(encoder_output)
        print('encoder_output:', encoder_output.shape)
        final_out = self.final_layer(encoder_output)
        # sys.exit(2)


        #######################################      Decode      ################################
        # decode_input = deinput_padding(s_dim,in_dim,bat_size)
        #                                             #   (inputs, encoder_out, training)
        # # print(' decode_input ', decode_input )
        # # print('encoder_output:',encoder_output.shape)
        # # print('decode_input:',decode_input.shape)
        # decoder_output, att_weights = self.decoder(decode_input, encoder_output, True)
        # # print('decoder_output:',decoder_output)
        # # sys.exit(2)
        # # 最后映射到输出层
        # final_out = self.final_layer(decoder_output) # shape=[batch_size, seq_len_target, target_vocab_size]
        # # print('final_out:',final_out.shape)
        # final_out = final_out[:,-2,:]
        # # print('final_out:', final_out.shape)
        # #######################################      Decode      ################################
        return final_out

# transformer测试
# sample_transformer = Transformer(num_layers=2, d_model=8, num_heads=4, dff=200, max_seq_len=30)
# temp_input = tf.random.uniform((10,30, 100))
# trana_out = sample_transformer(temp_input)
# print('trana_out.shape:',trana_out.shape)
# print(trana_out)
# sys.exit(2)


#   加载数据

def split_data(x_data,y_data,amount):
    all_sample = x_data.shape[0]
    indices = np.random.permutation(all_sample)
    indices = list(indices)
    print('indices:',indices,type(indices))
    split_index = int(all_sample*amount)
    train_x= x_data[indices[:split_index ]]
    train_y = y_data[indices[:split_index ]]

    test_x = x_data[indices[split_index :]]
    test_y = y_data[indices[split_index :]]

    return train_x,train_y,test_x,test_y

data = np.load(path)
timelength = 15
#   Img_data=result,Samid_data = result_id,Rain_data = result_rain
train_imgs = data['Img_data'].astype(np.float32)         #   imgs.shape: (500, 15, 4, 101, 101)
# train_imgs = np.reshape(train_imgs[:,14,:,:,:],(-1,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
train_imgs = np.reshape(train_imgs[:,15-timelength:,:,:,:],(-1,timelength,4,101,101))      #   imgs.shape: (500, 4, 101, 101)
print('train_imgs.shape:',train_imgs.shape)


# sys.exit(2)
#   将img归一化到[0,1]
train_imgs = train_imgs / 255.
train_rain = data['Rain_data'].astype(np.float32)      #   rain.shape: (5000, 1)
train_rain = np.reshape(train_rain,(-1,1))

# train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.8)
train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.999)

#   分割出小雨中雨大雨类型
type_train = np.where(train_rain > 15,2,1)
type_train = np.where(train_rain < 5 ,0,type_train)

#   类型占比的数据字典
num_dict = {}
for i in type_train:
    raintype = i[0]
    if raintype in num_dict.keys():
        num_dict[raintype] += 1
    else:
        num_dict[raintype] = 0
    # print(i)

for key,value in num_dict.items():
    value = value / train_imgs.shape[0]
    num_dict[key] = value
    print('key:{},value:{}'.format(key,value))
print('num_dict[0]:',num_dict[0])
# sys.exit(2)


onehot_train = tf.one_hot(type_train,depth=3)
onehot_train = tf.reshape(onehot_train,(-1,3))
#   获得每个高度的img,返回的字典key:['high0','high1','high2']   每个value的value.shape: (b, 101, 101)
def get_high_img(or_img):
    high_dic = {}
    for i in range(4):
        high_key = 'high{}'.format(i)
        # high_img = or_img[:,i,:,:]
        # high_dic[high_key] = np.reshape(high_img,(-1,101,101))
        high_img = or_img[:, :, i, :, :]
        high_dic[high_key] = np.reshape(high_img, (-1, timelength, 101, 101))
        print('high_key:',high_key)
    return high_dic

which_high = 'high3'
train_high_dic = get_high_img(train_imgs)
train_high0_img = train_high_dic[which_high]

print('train_high0_img.shape:',train_high0_img.shape)
print('high0_img max:{},min:{}'.format(np.max(train_high0_img),np.min(train_high0_img)))

test_high_dic = get_high_img(test_imgs)
test_high0_img = test_high_dic[which_high]

train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).batch(bat)
# train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).repeat()
test_db = tf.data.Dataset.from_tensor_slices((test_high0_img, test_rain)).batch(bat)

early_stoping = EarlyStopping(monitor='val_loss',patience=120)
opt = tf.keras.optimizers.Adam(lr=learn_rate,clipnorm=0.1)
# opt = tf.keras.optimizers.Adam(lr=learn_rate)
# opt = tf.keras.optimizers.SGD(lr=learn_rate)
# opt = tf.keras.optimizers.RMSprop(lr=learn_rate)
my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)

###################   加载模型    ####################################
# model_name = './my_save_model/trans_model_3/transmodel_3.ckpt'
# my_model.load_weights(model_name)
###################   加载模型    ####################################

# my_model = My_ConvLSTM(unit)
my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
# my_model.fit(train_high0_img,train_rain,validation_data=(test_high0_img,test_rain),epochs=epochs, validation_freq=1,batch_size=bat)
my_model.fit(train_db,validation_data=test_db,epochs=200, validation_freq=1,callbacks=early_stoping)
# my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,steps_per_epoch=train_imgs.shape[0]//bat)


#   保存模型
model_name = './my_save_model/trans_model_3/transmodel_3.ckpt'
# model_name = 'my_model1.ckpt'
my_model.save_weights(model_name)
print('保存完成')
del (my_model)
#   加载模型
my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15)
model_name = model_name
my_model.load_weights(model_name)
my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE)
print('加载完成')
my_model.evaluate(test_db)

# for x, y in train_db:
#     print('x.shape;',x.shape)
#     out = my_model(x)
#     print('out:',out)
#     print('y:',y)
#
# print('------------     test        ----------------------------------')
# for x, y in test_db:
#     # print('x.shape;',x.shape)
#     out = my_model(x)
#
#     print('out:',out)
#     print('y:',y)
# #
# #     # loss = tf.losses.MSE(y, out)
# #     # # loss = my_loss(y, out)
# #     # loss = tf.reduce_mean(loss)
# #     # test_loss.append(float(loss))
# #     # test_loss = tf.reduce_mean(test_loss)
# # print()

 

上一篇:tensorflow(三十七):卷积神经网络——CIFAR100与VGG实战


下一篇:经典卷积网络VGG,GoodLeNet,Inception