我知道使用 Trainable = False,我可以冻结特定层的所有权重。但我也想研究每一层的组件(内核、recurrent_kernel 和偏差)。我想将特定层的 kernel 和 recurrent_kernel 冻结为 Trainable = False 和 Trainable = True。我该怎么办?
我尝试了以下代码,但出现错误。谁能向我建议如何使用标准 Keras 使 kernel 和 recurrent_kernel Trainable = False ?
#transfer model layer
lstm_layer = modelTL.layers[0]
#kernel non-trainable
lstm_layer.cell.kernel.trainable = False
#Error : lstm_layer.cell.kernel.trainable = False
AttributeError: can't set attribute
构建图层后,您可以分离不同的变量并将它们设置为
trainable
或不设置。
import tensorflow as tf
layer = tf.keras.layers.LSTM(8)
layer.build(sequences_shape=(2, 4))
kernel, recurrent_kernel, bias, seed = layer.variables
kernel.trainable = False
recurrent_kernel.trainable = False
你能检查一下吗?为什么即使我冻结第一层的内核和 recurren_kernel 权重,参数也可以在摘要模型中进行训练?
import random
import numpy as np
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Dense, LSTM
from keras.models import load_model
np.random.seed(1)
tf.random.set_seed(1)
random.seed(1)
'base'
train_x = np.random.rand(100, 4, 8)
train_y = np.random.rand(100, 4)
test_x = np.random.rand(30, 4, 8)
test_y = np.random.rand(30, 4)
model = Sequential()
model.add(LSTM(units=256, activation='relu', input_shape=(train_x.shape[1], train_x.shape[2]), return_sequences=True, use_bias=True))
model.add(LSTM(units=512, activation='relu'))
model.add(Dense(activation='linear', units=4, use_bias=True))
model.compile(loss='mse', optimizer='Nadam', metrics=['mse','mae'] )
history=model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=3, verbose=1, batch_size=64)
model.save("source_model.h5")
'transfer'
train_x1 = np.random.rand(200, 4, 8)
train_y1 = np.random.rand(200, 4)
test_x1 = np.random.rand(60, 4, 8)
test_y1 = np.random.rand(60, 4)
pretrained_model = load_model('source_model.h5')
pretrained_model.layers
modelTL = Sequential()
for layer in pretrained_model.layers:
new_layer = layer.__class__.from_config(layer.get_config())
new_layer._name = f'trainable_{layer.name}'
modelTL.add(new_layer)
kernel, recurrent_kernel, bias = modelTL.layers[0].variables
kernel._trainable = False
recurrent_kernel._trainable = False
kernel_trainable = modelTL.layers[0].cell.kernel.trainable
recurrent_kernel_trainable = modelTL.layers[0].cell.recurrent_kernel.trainable
bias_trainable = modelTL.layers[0].cell.bias.trainable
print(f"Kernel Trainable: {kernel_trainable}")
print(f"Recurrent Kernel Trainable: {recurrent_kernel_trainable}")
print(f"Recurrent Kernel Trainable: {bias_trainable}")
modelTL.compile(loss='mse', optimizer='Nadam', metrics=['mse', 'mae'])
historyTL = modelTL.fit(train_x1, train_y1, validation_data=(test_x1, test_y1), epochs=3, verbose=1, batch_size=64)
modelTL.summary()