我正在尝试创建一个返回 2 个输出的模型:第一个是预测,第二个是注意力图。我为此使用 CIFAR-10 数据集。 问题是拟合方法不接受默认损失函数中有 2 个输出值的模型,因此我必须创建一个自定义损失函数。我无法弄清楚如何创建自定义损失函数。我已经为我的损失函数尝试了以下方法:-
def custom_loss(y_true, y_pred_with_data):
# Extract only the prediction array from y_pred_with_data
y_pred = y_pred_with_data[0]
loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
我正在运行以下代码来创建和拟合模型:-
def teacher_model_generator(input_shape, num_classes):
inputs = Input(shape=input_shape)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
cbam_output = cbam(x)
x = GlobalAveragePooling2D()(cbam_output)
x = Dense(num_classes, activation='softmax')(x)
teacher_output = (x, cbam_output) # Output both predictions and attention maps
return tf.keras.Model(inputs=inputs, outputs=teacher_output, name='teacher_model')
input_shape = (32, 32, 3)
num_classes = 10
temperature = 10
# Instantiate teacher model
teacher_model = teacher_model_generator(input_shape, num_classes)
teacher_model.compile(optimizer='adam', loss=custom_loss)
epochs = 50
# Train model
teacher_model.fit(x_train, y_train, epochs=epochs, batch_size=64)
我的教师模型的架构如下:-
Model: "teacher_model"
__________________________________________________________________________________________________
Layer (type) Output Shape Param # Connected to
==================================================================================================
input_1 (InputLayer) [(None, 32, 32, 3)] 0 []
conv2d (Conv2D) (None, 32, 32, 64) 1792 ['input_1[0][0]']
global_average_pooling2d ( (None, 64) 0 ['conv2d[0][0]']
GlobalAveragePooling2D)
global_max_pooling2d (Glob (None, 64) 0 ['conv2d[0][0]']
alMaxPooling2D)
dense (Dense) (None, 8) 512 ['global_average_pooling2d[0][
0]',
'global_max_pooling2d[0][0]']
dense_1 (Dense) (None, 64) 512 ['dense[0][0]',
'dense[1][0]']
tf.__operators__.add (TFOp (None, 64) 0 ['dense_1[0][0]',
Lambda) 'dense_1[1][0]']
activation (Activation) (None, 64) 0 ['tf.__operators__.add[0][0]']
multiply (Multiply) (None, 32, 32, 64) 0 ['conv2d[0][0]',
'activation[0][0]']
tf.math.reduce_mean (TFOpL (None, 32, 64) 0 ['multiply[0][0]']
ambda)
tf.math.reduce_mean_1 (TFO (None, 32, 64) 0 ['multiply[0][0]']
pLambda)
tf.expand_dims (TFOpLambda (None, 1, 32, 64) 0 ['tf.math.reduce_mean[0][0]']
)
tf.expand_dims_1 (TFOpLamb (None, 1, 32, 64) 0 ['tf.math.reduce_mean_1[0][0]'
da) ]
concatenate (Concatenate) (None, 1, 32, 128) 0 ['tf.expand_dims[0][0]',
'tf.expand_dims_1[0][0]']
conv2d_1 (Conv2D) (None, 1, 32, 1) 6273 ['concatenate[0][0]']
multiply_1 (Multiply) (None, 32, 32, 64) 0 ['multiply[0][0]',
'conv2d_1[0][0]']
tf.__operators__.add_1 (TF (None, 32, 32, 64) 0 ['conv2d[0][0]',
OpLambda) 'multiply_1[0][0]']
global_average_pooling2d_1 (None, 64) 0 ['tf.__operators__.add_1[0][0]
(GlobalAveragePooling2D) ']
dense_2 (Dense) (None, 10) 650 ['global_average_pooling2d_1[0
][0]']
==================================================================================================
Total params: 9739 (38.04 KB)
Trainable params: 9739 (38.04 KB)
Non-trainable params: 0 (0.00 Byte)
__________________________________________________________________________________________________
目前,我收到以下错误:-
Epoch 1/50
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[18], line 14
12 epochs = 50
13 # Train your model
---> 14 teacher_model.fit(x_train, y_train, epochs=epochs, batch_size=64)
File ~\AppData\Roaming\Python\Python311\site-packages\keras\src\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_fileekxla2qb.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
File ~\AppData\Local\Temp\__autograph_generated_fileeyasu_it.py:11, in outer_factory.<locals>.inner_factory.<locals>.tf__custom_loss(y_true, y_pred_with_data)
9 retval_ = ag__.UndefinedReturnValue()
10 y_pred = ag__.ld(y_pred_with_data)[0]
---> 11 loss = ag__.converted_call(ag__.ld(tf).keras.losses.sparse_categorical_crossentropy, (ag__.ld(y_true), ag__.ld(y_pred)), None, fscope)
12 try:
13 do_return = True
ValueError: in user code:
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\engine\training.py", line 1401, in train_function *
return step_function(self, iterator)
File "C:\Users\21112\AppData\Local\Temp\ipykernel_12124\1897110577.py", line 6, in custom_loss *
loss = tf.keras.losses.sparse_categorical_crossentropy(y_true, y_pred)
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\losses.py", line 2454, in sparse_categorical_crossentropy
return backend.sparse_categorical_crossentropy(
File "C:\Users\21112\AppData\Roaming\Python\Python311\site-packages\keras\src\backend.py", line 5775, in sparse_categorical_crossentropy
res = tf.nn.sparse_softmax_cross_entropy_with_logits(
ValueError: Dimension size must be evenly divisible by 10 but is 1024 for '{{node custom_loss_1/SparseSoftmaxCrossEntropyWithLogits/Reshape_2}} = Reshape[T=DT_FLOAT, Tshape=DT_INT32](custom_loss_1/SparseSoftmaxCrossEntropyWithLogits/SparseSoftmaxCrossEntropyWithLogits, custom_loss_1/SparseSoftmaxCrossEntropyWithLogits/Shape)' with input shapes: [1024], [2] and with input tensors computed as partial shapes: input[1] = [?,10].
通常,在多输出模型中,您尝试仅计算特定输出的损失。这是一种方法:您可以在编译方法中指定具有所需损失方法的目标层。例如:
loss = {
"out_a": keras.losses.CategoricalCrossentropy(),
"out_b": keras.losses.SparseCategoricalCrossentropy(),
"out_c": keras.losses.MeanSquaredError()
}
这是一个玩具示例。
(xtrain, ytrain), (xtest, _) = keras.datasets.mnist.load_data()
xtrain = xtrain[:10000]
ytrain = ytrain[:10000]
y_out_a = keras.utils.to_categorical(ytrain, num_classes=10)
y_out_b = keras.utils.to_categorical((ytrain % 2 == 0).astype(int), num_classes=2)
def teacher_model_generator(input_shape, num_classes):
inputs = Input(shape=input_shape)
x = Conv2D(64, (3, 3), padding='same', activation='relu')(inputs)
x = GlobalAveragePooling2D()(x)
out_a = keras.layers.Dense(10, activation='softmax', name='out_a')(x)
out_b = keras.layers.Dense(2, activation='softmax', name='out_b')(x)
return keras.Model(
inputs=inputs, outputs=[out_a, out_b], name='teacher_model'
)
input_shape = (28, 28, 1)
num_classes = 10
teacher_model = teacher_model_generator(input_shape, num_classes)
teacher_model.compile(
loss = {
"out_a": tf.keras.losses.CategoricalCrossentropy(),
},
metrics = 'acc',
optimizer = keras.optimizers.Adam(learning_rate=0.001)
)
teacher_model.fit(xtrain, [y_out_a, y_out_b,], epochs=3)
Epoch 1/3
loss: 2.2846 - out_a_loss: 2.2846 - out_a_acc: 0.2191 - out_b_acc: 0.4930
Epoch 2/3
loss: 1.7500 - out_a_loss: 1.7500 - out_a_acc: 0.3483 - out_b_acc: 0.4930
Epoch 3/3
loss: 1.6229 - out_a_loss: 1.6229 - out_a_acc: 0.3922 - out_b_acc: 0.4930
这是使用 keras 的高级 API,但如果您想使用一点 中级 API ,您可能可以获得更多控制权。