from tensorflow.keras.callbacks import EarlyStopping import tensorflow as tf import time import numpy as np import matplotlib.pyplot as plt import sys from tensorflow import keras import os from tensorflow import nn import math # 设置相关底层配置 physical_devices = tf.config.experimental.list_physical_devices('GPU') assert len(physical_devices) > 0, "Not enough GPU hardware devices available" tf.config.experimental.set_memory_growth(physical_devices[0], True) # os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # os.environ["CUDA_VISIBLE_DEVICES"] = "0" # 使用第2块gpu def my_loss(y_true, y_pred): sub = y_true - y_pred sub = tf.square(sub) # 分段计算loss sub = tf.where(sub< 25., sub* 0.5, sub) sub = tf.where(y_true > 30., sub * 2, sub) # sub = tf.where(y_pred < 0., sub * 100, sub) return_data = sub # print('***********************return_data.shape:{}'.format(return_data.shape)) return return_data def positional_encoding(pos, d_model): ''' :param pos: 词在句子中的位置,句子上的维族;(i是d_model上的维度) :param d_model: 隐状态的维度,相当于num_units :return: 位置编码 shape=[1, position_num, d_model], 其中第一个维度是为了匹配batch_size ''' def get_angles(position, i): # 这里的i相当于公式里面的2i或2i+1 # 返回shape=[position_num, d_model] return position / np.power(10000., 2. * (i // 2.) / np.float(d_model)) angle_rates = get_angles(np.arange(pos)[:, np.newaxis], np.arange(d_model)[np.newaxis, :]) # 2i位置使用sin编码,2i+1位置使用cos编码 pe_sin = np.sin(angle_rates[:, 0::2]) pe_cos = np.cos(angle_rates[:, 1::2]) pos_encoding = np.concatenate([pe_sin, pe_cos], axis=-1) pos_encoding = tf.cast(pos_encoding[np.newaxis, ...], tf.float32) return pos_encoding '''*************** 第一部分: Scaled dot-product attention ***************''' def my_mask(inputs): # print('inputs:',inputs.shape) # print('mask前:',inputs) s_dim = inputs.shape[-1] mask = tf.ones((s_dim, s_dim)) # (T_q, T_k) # 这一句的意思是生成一个上三角矩阵,上三角矩阵用来对decoder的结果进行mask mask = tf.linalg.LinearOperatorLowerTriangular(mask).to_dense() padding_num = -2 ** 32 + 1.1 mask_data = tf.multiply(mask, inputs) # print('mask_data:',float(mask_data)) outputs = tf.where(tf.equal(mask_data, 0.), padding_num, mask_data) # so_data = tf.nn.softmax(outputs,axis=1) # print('mask后:', outputs) # sys.exit(2) return outputs def scaled_dot_product_attention(q, k, v, mask = None): '''attention(Q, K, V) = softmax(Q * K^T / sqrt(dk)) * V''' # query 和 Key相乘 # print('q:',q.shape,q) matmul_qk = tf.matmul(q, k, transpose_b=True) # print('matmul_qk:',matmul_qk.shape,matmul_qk) # print('完毕') # 使用dk进行缩放 dk = tf.cast(tf.shape(q)[-1], tf.float32) scaled_attention =matmul_qk / tf.math.sqrt(dk) # 掩码mask if mask is not None: print('有mask') scaled_attention = my_mask(scaled_attention) # 通过softmax获取attention权重, mask部分softmax后为0 attention_weights = tf.nn.softmax(scaled_attention) # shape=[batch_size, seq_len_q, seq_len_k] # print('attention_weights:',attention_weights) # 乘以value outputs = tf.matmul(attention_weights, v) # shape=[batch_size, seq_len_q, depth] return outputs, attention_weights '''*************** 第二部分: Multi-Head Attention ***************''' ''' multi-head attention包含3部分: - 线性层与分头 - 缩放点积注意力 - 头连接 - 末尾线性层 每个多头注意块有三个输入; Q(查询),K(密钥),V(值)。 它们通过第一层线性层并分成多个头。 注意:点积注意力时需要使用mask, 多头输出需要使用tf.transpose调整各维度。 Q,K和V不是一个单独的注意头,而是分成多个头,因为它允许模型共同参与来自不同表征空间的不同信息。 在拆分之后,每个头部具有降低的维度,总计算成本与具有全维度的单个头部注意力相同。 ''' class MultiHeadAttention(tf.keras.layers.Layer): def __init__(self, d_model, num_heads): super(MultiHeadAttention, self).__init__() self.num_heads = num_heads self.d_model = d_model # d_model必须可以正确分成多个头 assert d_model % num_heads == 0 # 分头之后维度 self.depth = d_model // num_heads self.wq = tf.keras.layers.Dense(d_model) self.wk = tf.keras.layers.Dense(d_model) self.wv = tf.keras.layers.Dense(d_model) self.dense = tf.keras.layers.Dense(d_model) def split_heads(self, x, batch_size): # 分头,将头个数的维度,放到seq_len前面 x输入shape=[batch_size, seq_len, d_model] # print('split前.shape:',x.shape) x = tf.reshape(x, [batch_size, -1, self.num_heads, self.depth]) # print('split后.shape:', x.shape) return tf.transpose(x, perm=[0, 2, 1, 3]) def call(self, q, k, v, mask = None): # print('----------------------------- multi ---------------------------------------') # print('multil_q:',q.shape,q) # print('******************************** multi ************************************') batch_size = tf.shape(q)[0] # 分头前的前向网络,根据q,k,v的输入,计算Q, K, V语义 q = self.wq(q) # shape=[batch_size, seq_len_q, d_model] k = self.wq(k) v = self.wq(v) # 分头 q = self.split_heads(q, batch_size) # shape=[batch_size, num_heads, seq_len_q, depth] k = self.split_heads(k, batch_size) v = self.split_heads(v, batch_size) # 通过缩放点积注意力层 # scaled_attention shape=[batch_size, num_heads, seq_len_q, depth] # attention_weights shape=[batch_size, num_heads, seq_len_q, seq_len_k] scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask) # 把多头维度后移 scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3]) # shape=[batch_size, seq_len_q, num_heads, depth] # 把多头合并 concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # shape=[batch_size, seq_len_q, d_model] # 全连接重塑 output = self.dense(concat_attention) return output, attention_weights class LayerNormalization(tf.keras.layers.Layer): def __init__(self, epsilon=1e-8, **kwargs): super(LayerNormalization, self).__init__(**kwargs) self.epsilon = epsilon def build(self, input_shape): self.gamma = self.add_weight(name='gamma', shape=input_shape[-1:], initializer=tf.ones_initializer(), trainable=True) self.beta = self.add_weight(name='beta', shape=input_shape[-1:], initializer=tf.zeros_initializer(), trainable=True) super(LayerNormalization, self).build(input_shape) def call(self, x): # x shape=[batch_size, seq_len, d_model] mean = tf.keras.backend.mean(x, axis=-1, keepdims=True) std = tf.keras.backend.std(x, axis=-1, keepdims=True) return self.gamma * (x - mean) / (std + self.epsilon) + self.beta def point_wise_feed_forward(d_model, diff): return tf.keras.Sequential([ tf.keras.layers.Dense(diff, activation=tf.nn.relu), tf.keras.layers.Dense(d_model) ]) '''encoder layer: 每个编码层包含以下子层 - Multi-head attention(带掩码) - Point wise feed forward networks 每个子层中都有残差连接,并最后通过一个正则化层。残差连接有助于避免深度网络中的梯度消失问题。 每个子层输出是LayerNorm(x + Sublayer(x)),规范化是在d_model维的向量上。Transformer一共有n个编码层。 ''' class EncoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, dropout_rate=0.1): super(EncoderLayer, self).__init__() self.mha = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward(d_model, dff) self.layernorm1 = LayerNormalization() self.layernorm2 = LayerNormalization() self.dropout1 = tf.keras.layers.Dropout(dropout_rate) self.dropout2 = tf.keras.layers.Dropout(dropout_rate) def call(self, inputs, training): # multi head attention (encoder时Q = K = V) att_output, _ = self.mha(inputs, inputs, inputs) att_output = self.dropout1(att_output, training=training) output1 = self.layernorm1(inputs + att_output) # shape=[batch_size, seq_len, d_model] # feed forward network ffn_output = self.ffn(output1) ffn_output = self.dropout2(ffn_output, training=training) output2 = self.layernorm2(output1 + ffn_output) # shape=[batch_size, seq_len, d_model] return output2 class Encoder(tf.keras.layers.Layer): def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1): super(Encoder, self).__init__() self.indata = tf.keras.layers.Dense(d_model) self.num_layers = num_layers self.d_model = d_model # self.emb = tf.keras.layers.Embedding(5000, d_model) # shape=[batch_size, seq_len, d_model] self.pos_encoding = positional_encoding(max_seq_len, d_model) # shape=[1, max_seq_len, d_model] self.encoder_layer = [EncoderLayer(d_model, num_heads, dff, dropout_rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(dropout_rate) def call(self, inputs, training): # print('inputs.shape:',inputs.shape) # sys.exit(2) # 输入部分;inputs shape=[batch_size, seq_len] seq_len = inputs.shape[1] # 句子真实长度 # word_embedding = self.emb(inputs) # shape=[batch_size, seq_len, d_model] # print('word_embedding .shape:',word_embedding .shape) # sys.exit(2) # word_embedding = self.indata(inputs) word_embedding = inputs word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) emb= word_embedding + self.pos_encoding[:, :seq_len, :] x = self.dropout(emb, training=training) for i in range(self.num_layers): x = self.encoder_layer[i](x, training) return x # shape=[batch_size, seq_len, d_model] class DecoderLayer(tf.keras.layers.Layer): def __init__(self, d_model, num_heads, dff, dropout_rate=0.1): super(DecoderLayer, self).__init__() self.mha1 = MultiHeadAttention(d_model, num_heads) self.mha2 = MultiHeadAttention(d_model, num_heads) self.ffn = point_wise_feed_forward(d_model, dff) self.layernorm1 = LayerNormalization() self.layernorm2 = LayerNormalization() self.layernorm3 = LayerNormalization() self.dropout1 = tf.keras.layers.Dropout(dropout_rate) self.dropout2 = tf.keras.layers.Dropout(dropout_rate) self.dropout3 = tf.keras.layers.Dropout(dropout_rate) def call(self, inputs, encoder_out, training): # masked multi-head attention: Q = K = V # print('decode_inputs:',inputs) # sys.exit(2) att_out1, att_weight1 = self.mha1(inputs, inputs, inputs,mask = True) att_out1 = self.dropout1(att_out1, training=training) att_out1 = self.layernorm1(inputs + att_out1) # multi-head attention: Q=att_out1, K = V = encoder_out # print('------------------------- mh2 ---------------------------------------') att_out2, att_weight2 = self.mha2(att_out1, encoder_out, encoder_out) # print('att_out2 :', att_out2) att_out2 = self.dropout2(att_out2, training=training) att_out2 = self.layernorm2(att_out1 + att_out2) # att_out2 = self.layernorm2(att_out2) # print('att_out2 :',att_out2 ) # sys.exit(2) # feed forward network ffn_out = self.ffn(att_out2) ffn_out = self.dropout3(ffn_out, training=training) output = self.layernorm3(att_out2 + ffn_out) return output, att_weight1, att_weight2 class Decoder(tf.keras.layers.Layer): def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1): super(Decoder, self).__init__() self.seq_len = tf.shape self.indata = tf.keras.layers.Dense(d_model) self.d_model = d_model self.num_layers = num_layers self.pos_encoding = positional_encoding(max_seq_len, d_model) self.decoder_layers = [DecoderLayer(d_model, num_heads, dff, dropout_rate) for _ in range(num_layers)] self.dropout = tf.keras.layers.Dropout(dropout_rate) def call(self, inputs, encoder_out, training): # print('--------------------------------- decode --------------------------------') seq_len = inputs.shape[1] attention_weights = {} word_embedding = self.indata(inputs) # print('inputs:',inputs.shape) # print('self.pos_encoding[:, :seq_len, :]:',self.pos_encoding[:, :seq_len, :].shape) # word_embedding *= tf.math.sqrt(tf.cast(self.d_model, tf.float32)) # emb = word_embedding + self.pos_encoding[:, :seq_len, :] emb = word_embedding # print('emb:',emb) # sys.exit(2) x = self.dropout(emb, training=training) for i in range(self.num_layers): # print('i:',i) x, att1, att2 = self.decoder_layers[i](x, encoder_out, training) attention_weights['decoder_layer{}_att_w1'.format(i+1)] = att1 attention_weights['decoder_layer{}_att_w2'.format(i + 1)] = att2 return x, attention_weights def deinput_padding(seq_len,dim,batch_size): # print('pading_batch_size:',batch_size) zero = np.zeros((seq_len,dim)) # print('zero:',zero.shape) zero[0,:] = 1 zero = np.tile(zero,(batch_size,1,1)) # print('zero:', zero, zero.shape) return zero # 超参数 learn_rate = 2e-4 # learn_rate = 2.0e-4 epochs =200 bat = 10 pt = 120 # patience # path = './8000_np_img.npz' path = './5000_np_img.npz' # path = './2000_np_img.npz' # path = './500_np_img.npz' # base_dim = 8 mid_dim = 10 dense_dim = 100 time_list = [1,2,4,8,16,32] unit = 40 def c_b(chanel,kernel_size,stride = 1 ,padding ='valid'): initial = tf.keras.initializers.TruncatedNormal(stddev=0.02) result = keras.Sequential([ tf.keras.layers.Conv2D(chanel, kernel_size=kernel_size, strides=stride, padding=padding, kernel_initializer=initial), tf.keras.layers.BatchNormalization(), keras.layers.LeakyReLU() ]) return result class Transformer(tf.keras.Model): def __init__(self, d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate=0.1): super(Transformer, self).__init__() self.layernorm1 = LayerNormalization() self.layernorm2 = LayerNormalization() # self.mycnn = tf.keras.Sequential([ # c_b(6, [3, 3], stride=1, padding='SAME'), # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), # # c_b(12, [3, 3], stride=1, padding='SAME'), # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), # # c_b(24, [3, 3], stride=1, padding='SAME'), # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), # # c_b(48, [3, 3], stride=1, padding='SAME'), # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), # # c_b(48, [3, 3], stride=1, padding='SAME'), # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), # tf.keras.layers.Flatten() # ]) self.mycnn = tf.keras.Sequential([ c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[0], [3, 3], stride=1, padding='SAME'), # # tf.keras.layers.MaxPool2D(pool_size=[2, 2], strides=2), c_b(base_dim * time_list[0], [3, 3], stride=2, padding='SAME'), c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[1], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[1], [3, 3], stride=2, padding='SAME'), c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[2], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[2], [3, 3], stride=2, padding='SAME'), c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[3], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[3], [3, 3], stride=2, padding='SAME'), c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[4], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[4], [3, 3], stride=2, padding='SAME'), c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[5], [3, 3], stride=1, padding='SAME'), c_b(base_dim * time_list[5], [3, 3], stride=2, padding='SAME'), # tf.keras.layers.Flatten() tf.keras.layers.GlobalAveragePooling2D() ]) self.myDense1 = tf.keras.Sequential([ # tf.keras.layers.Dense(10, activation='relu'), tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu), tf.keras.layers.Dense(dense_dim, activation=tf.nn.relu), tf.keras.layers.Dense(d_model) # tf.keras.layers.Dense(3) ]) self.encoder = Encoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate) self.emb = tf.keras.layers.Dense(d_model) # self.decoder = Decoder(d_model, num_layers, num_heads, dff, max_seq_len, dropout_rate) self.dim_dense = tf.keras.layers.Dense(1) self.final_layer = tf.keras.layers.Dense(1) self.flat = tf.keras.layers.Flatten() def call(self, inputs): inputs = tf.cast(inputs, dtype=tf.float32) inputs = tf.reshape(inputs, (-1, 15, 101, 101, 1)) inputs = tf.keras.layers.TimeDistributed(self.mycnn)(inputs) # print('out.shape:', out.shape,out[0,0,:5]) out = tf.keras.layers.TimeDistributed(self.myDense1)(inputs) inputs = out # sys.exit(2) inputs = self.layernorm1(inputs) inputs = self.emb(inputs) print('trains_inputs:',inputs) # print('inputs.shape:', inputs.shape) # sys.exit(2) # 首先encoder过程,输出shape=[batch_size, seq_len_input, d_model] inputs = self.layernorm2(inputs) print('layerhoutrains_inputs:', inputs) encoder_output = self.encoder(inputs) # 再进行decoder, 输出shape=[batch_size, seq_len_target, d_model] encoder_output = self.dim_dense(encoder_output) print('encoder_output:',encoder_output.shape) encoder_output = self.flat(encoder_output) print('encoder_output:', encoder_output.shape) final_out = self.final_layer(encoder_output) # sys.exit(2) ####################################### Decode ################################ # decode_input = deinput_padding(s_dim,in_dim,bat_size) # # (inputs, encoder_out, training) # # print(' decode_input ', decode_input ) # # print('encoder_output:',encoder_output.shape) # # print('decode_input:',decode_input.shape) # decoder_output, att_weights = self.decoder(decode_input, encoder_output, True) # # print('decoder_output:',decoder_output) # # sys.exit(2) # # 最后映射到输出层 # final_out = self.final_layer(decoder_output) # shape=[batch_size, seq_len_target, target_vocab_size] # # print('final_out:',final_out.shape) # final_out = final_out[:,-2,:] # # print('final_out:', final_out.shape) # ####################################### Decode ################################ return final_out # transformer测试 # sample_transformer = Transformer(num_layers=2, d_model=8, num_heads=4, dff=200, max_seq_len=30) # temp_input = tf.random.uniform((10,30, 100)) # trana_out = sample_transformer(temp_input) # print('trana_out.shape:',trana_out.shape) # print(trana_out) # sys.exit(2) # 加载数据 def split_data(x_data,y_data,amount): all_sample = x_data.shape[0] indices = np.random.permutation(all_sample) indices = list(indices) print('indices:',indices,type(indices)) split_index = int(all_sample*amount) train_x= x_data[indices[:split_index ]] train_y = y_data[indices[:split_index ]] test_x = x_data[indices[split_index :]] test_y = y_data[indices[split_index :]] return train_x,train_y,test_x,test_y data = np.load(path) timelength = 15 # Img_data=result,Samid_data = result_id,Rain_data = result_rain train_imgs = data['Img_data'].astype(np.float32) # imgs.shape: (500, 15, 4, 101, 101) # train_imgs = np.reshape(train_imgs[:,14,:,:,:],(-1,4,101,101)) # imgs.shape: (500, 4, 101, 101) train_imgs = np.reshape(train_imgs[:,15-timelength:,:,:,:],(-1,timelength,4,101,101)) # imgs.shape: (500, 4, 101, 101) print('train_imgs.shape:',train_imgs.shape) # sys.exit(2) # 将img归一化到[0,1] train_imgs = train_imgs / 255. train_rain = data['Rain_data'].astype(np.float32) # rain.shape: (5000, 1) train_rain = np.reshape(train_rain,(-1,1)) train_imgs,train_rain,test_imgs,test_rain = split_data(train_imgs,train_rain,0.8) # 分割出小雨中雨大雨类型 type_train = np.where(train_rain > 15,2,1) type_train = np.where(train_rain < 5 ,0,type_train) # 类型占比的数据字典 num_dict = {} for i in type_train: raintype = i[0] if raintype in num_dict.keys(): num_dict[raintype] += 1 else: num_dict[raintype] = 0 # print(i) for key,value in num_dict.items(): value = value / train_imgs.shape[0] num_dict[key] = value print('key:{},value:{}'.format(key,value)) print('num_dict[0]:',num_dict[0]) # sys.exit(2) onehot_train = tf.one_hot(type_train,depth=3) onehot_train = tf.reshape(onehot_train,(-1,3)) # 获得每个高度的img,返回的字典key:['high0','high1','high2'] 每个value的value.shape: (b, 101, 101) def get_high_img(or_img): high_dic = {} for i in range(4): high_key = 'high{}'.format(i) # high_img = or_img[:,i,:,:] # high_dic[high_key] = np.reshape(high_img,(-1,101,101)) high_img = or_img[:, :, i, :, :] high_dic[high_key] = np.reshape(high_img, (-1, timelength, 101, 101)) print('high_key:',high_key) return high_dic which_high = 'high3' train_high_dic = get_high_img(train_imgs) train_high0_img = train_high_dic[which_high] print('train_high0_img.shape:',train_high0_img.shape) print('high0_img max:{},min:{}'.format(np.max(train_high0_img),np.min(train_high0_img))) test_high_dic = get_high_img(test_imgs) test_high0_img = test_high_dic[which_high] train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).batch(bat) # train_db = tf.data.Dataset.from_tensor_slices((train_high0_img, train_rain)).shuffle(500).repeat() test_db = tf.data.Dataset.from_tensor_slices((test_high0_img, test_rain)).batch(bat) early_stoping = EarlyStopping(monitor='val_loss',patience=pt) opt = tf.keras.optimizers.Adam(lr=learn_rate,clipnorm=0.1) # opt = tf.keras.optimizers.Adam(lr=learn_rate) # opt = tf.keras.optimizers.SGD(lr=learn_rate) # opt = tf.keras.optimizers.RMSprop(lr=learn_rate) my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15) # my_model = My_ConvLSTM(unit) ################### 加载模型 #################################### # model_name = './my_save_model/trans_model_LOSS1/transmodel_LOSS1.ckpt' # my_model.load_weights(model_name) ################### 加载模型 #################################### # my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE) my_model.compile(optimizer=opt,loss=my_loss) # my_model.fit(train_high0_img,train_rain,validation_data=(test_high0_img,test_rain),epochs=epochs, validation_freq=1,batch_size=bat) my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,callbacks=early_stoping) # my_model.fit(train_db,validation_data=test_db,epochs=epochs, validation_freq=1,steps_per_epoch=train_imgs.shape[0]//bat) # 保存模型 model_name = './my_save_model/trans_model_LOSS_3/transmodel_LOSS_3.ckpt' print('Save_model_name',model_name) # model_name = 'my_model1.ckpt' my_model.save_weights(model_name) print('保存完成') del (my_model) # # 加载模型 # my_model = Transformer(num_layers=1, d_model=10, num_heads=2, dff=10, max_seq_len=15) # model_name = model_name # my_model.load_weights(model_name) # my_model.compile(optimizer=opt,loss=tf.keras.losses.MSE) # print('加载完成') # my_model.evaluate(test_db)