以下模型的损失函数在训练过程中爆炸。帮助调试将不胜感激!
def getModel():
num_minutes_in_timeseries = 1740
etd_num_features = 5
etd_input = keras.Input(shape = (num_minutes_in_timeseries, etd_num_features), name = 'etd_input')
# Convolution for ETD
etd_conv1 = layers.Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(etd_input)
etd_conv2 = layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(etd_conv1)
# Concat with original
concat_f_o = layers.concatenate([etd_conv2, etd_input], axis=-1)
# Rest of Features
lstm1 = layers.LSTM(69, activation='relu')(concat_f_o)
lstm1_dropout = layers.Dropout(0.2)(lstm1)
dense1= layers.Dense(69, activation='relu')(lstm1_dropout)
dense2= layers.Dense(35, activation='relu')(dense1)
etd_out = layers.Dense(1, activation='relu')(dense2)
model = keras.Model(inputs = etd_input, outputs = etd_out)
optimizer = keras.optimizers.Adam(learning_rate=0.0005, clipvalue=0.5)
model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
return model
我尝试过调整学习率、损失函数、三重检查输入数据、尝试不同的体系结构以及其他几个,并且在训练过程中损失总是不同的。
有趣的是,如果像下面这样移除 LSTM 层,那么模型会很好地收敛。
def getModel():
num_minutes_in_timeseries = 1740
etd_num_features = 5
etd_input = keras.Input(shape = (num_minutes_in_timeseries, etd_num_features), name = 'etd_input')
# Convolution for ETD
etd_conv1 = layers.Conv1D(filters=32, kernel_size=3, activation='relu', padding='same')(etd_input)
etd_conv2 = layers.Conv1D(filters=64, kernel_size=3, activation='relu', padding='same')(etd_conv1)
# Concat with original
concat_f_o = layers.concatenate([etd_conv2, etd_input], axis=-1)
# Rest of Features
#lstm1 = layers.LSTM(69, activation='relu')(concat_f_o)
#lstm1_dropout = layers.Dropout(0.2)(lstm1)
dense1= layers.Dense(69, activation='relu')(concat_f_o)
dense2= layers.Dense(35, activation='relu')(dense1)
etd_out = layers.Dense(1, activation='relu')(dense2)
model = keras.Model(inputs = etd_input, outputs = etd_out)
optimizer = keras.optimizers.Adam(learning_rate=0.0005, clipvalue=0.5)
model.compile(optimizer=optimizer, loss='mae', metrics=['mae'])
return model