多任务学习对狗图像进行分类

Question

我正在尝试训练多任务分类模型（mobilenet）。基本上，这是一个给出狗图像的单一模型，它对颜色和品种进行分类。每个数据集只包含每个类的目录以及这些类内的图像。这里是数据集的示例子集。最终，我需要涵盖 5 个颜色类别和 10 个品种。

当我运行它时，我收到错误

logits and labels must be broadcastable

。怎么解决？

这是我的代码：

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set the directories for the color and breed datasets
color_data_dir = './color_dataset'
breed_data_dir = './breed_dataset'

# Define the input shape for the MobileNet model
input_shape = (224, 224, 3)

# Create an input layer for clarity and potential customization
input_layer = Input(shape=input_shape)

# Load the MobileNetV2 model (excluding the top fully connected layers)
base_model = MobileNetV2(include_top=False, input_tensor=input_layer)

# Add task-specific fully connected layers for color classification
color_branch = GlobalAveragePooling2D()(base_model.output)
color_branch = Dense(12, activation='softmax', name='color_output')(color_branch)

# Add task-specific fully connected layers for breed classification
breed_branch = GlobalAveragePooling2D()(base_model.output)
breed_branch = Dense(6, activation='softmax', name='breed_output')(breed_branch)

# Create the multi-task model with both branches
model = Model(inputs=input_layer, outputs=[color_branch, breed_branch])

# Compile the model with appropriate loss functions for each task
model.compile(loss={'color_output': 'categorical_crossentropy', 'breed_output': 'categorical_crossentropy'},
              optimizer='adam', metrics=['accuracy'])

# Set up data generator for the combined color and breed datasets
data_generator = ImageDataGenerator(rescale=1.0/255.0)

class MultiTaskDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, color_data_dir, breed_data_dir, batch_size, target_size):
        self.color_data_dir = color_data_dir
        self.breed_data_dir = breed_data_dir
        self.batch_size = batch_size
        self.target_size = target_size

        self.color_data = data_generator.flow_from_directory(
            directory=color_data_dir,
            target_size=target_size[:2],
            batch_size=batch_size,
            class_mode='categorical'
        )

        self.breed_data = data_generator.flow_from_directory(
            directory=breed_data_dir,
            target_size=target_size[:2],
            batch_size=batch_size,
            class_mode='categorical'
        )

    def __len__(self):
        return min(len(self.color_data), len(self.breed_data))

    def __getitem__(self, index):
        color_batch, color_labels = self.color_data[index]
        breed_batch, breed_labels = self.breed_data[index]

        # Concatenate the batches along the batch dimension
        return tf.concat([color_batch, breed_batch], axis=0), [color_labels, breed_labels]

# Create an instance of the custom data generator
data_gen = MultiTaskDataGenerator(color_data_dir, breed_data_dir, batch_size=32, target_size=input_shape)

# Train the model on both tasks simultaneously using the custom data generator
model.fit(
    data_gen,
    epochs=1
)

这是完整的堆栈跟踪：

To enable the following instructions: SSE SSE2 SSE3 SSE4.1 SSE4.2 AVX AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
Found 11 images belonging to 3 classes.
Found 6 images belonging to 3 classes.
Traceback (most recent call last):
  File "\test\train.py", line 73, in <module>
    model.fit(
  File "\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
    raise e.with_traceback(filtered_tb) from None
  File "\Python\Python310\site-packages\tensorflow\python\eager\execute.py", line 53, in quick_execute
    tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,
tensorflow.python.framework.errors_impl.InvalidArgumentError: Graph execution error:

Detected at node 'categorical_crossentropy_1/softmax_cross_entropy_with_logits' defined at (most recent call last):
    File "\test\train.py", line 73, in <module>
      model.fit(
    File "\Python\Python310\site-packages\keras\src\utils\traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1783, in fit
      tmp_logs = self.train_function(iterator)
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1377, in train_function
      return step_function(self, iterator)
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1360, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1349, in run_step
      outputs = model.train_step(data)
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1127, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "\Python\Python310\site-packages\keras\src\engine\training.py", line 1185, in compute_loss
      return self.compiled_loss(
    File "\Python\Python310\site-packages\keras\src\engine\compile_utils.py", line 277, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "\Python\Python310\site-packages\keras\src\losses.py", line 143, in __call__
      losses = call_fn(y_true, y_pred)
    File "\Python\Python310\site-packages\keras\src\losses.py", line 270, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "\Python\Python310\site-packages\keras\src\losses.py", line 2221, in categorical_crossentropy
      return backend.categorical_crossentropy(
    File "\Python\Python310\site-packages\keras\src\backend.py", line 5581, in categorical_crossentropy
      return tf.nn.softmax_cross_entropy_with_logits(
Node: 'categorical_crossentropy_1/softmax_cross_entropy_with_logits'
logits and labels must be broadcastable: logits_size=[17,6] labels_size=[6,3]
         [[{{node categorical_crossentropy_1/softmax_cross_entropy_with_logits}}]] [Op:__inference_train_function_21323]

Answer 1

正如评论中提到的，您可能需要重新组织数据集。此任务的理想数据格式如下：

df.head()
image_path  breed_category   color_category
a.png         bulldog            black
b.png         germanshepherd     white
...

由此我们可以构建生成样本和两个相应目标的数据加载器。但就您而言，您有两个数据集，每个数据集包含不同的类。样本 ID 也不同。比如：

df.head()
image_path  breed_category   color_category
a.png           NaN             black
b.png         germanshepherd     NaN
...

AFAIK，对于此类建模来说，这是不寻常的设置。但同时，让它发挥作用也不是不可能，但这会带来不必要的训练复杂性。

    def __getitem__(self, index):
        color_batch, color_labels = self.color_data[index]
        breed_batch, breed_labels = self.breed_data[index]

        # Concatenate the batches along the batch dimension
        return tf.concat([color_batch, breed_batch], axis=0), 
               [color_labels, breed_labels]

相反，您可以构建两个模型进行训练，然后从这两个模型构建多输出模型。方法如下：

color_data = data_generator.flow_from_directory(
    directory=color_data_dir,
    target_size=input_shape[:2],
    batch_size=16,
    class_mode='categorical'
)

breed_data = data_generator.flow_from_directory(
    directory=breed_data_dir,
    target_size=input_shape[:2],
    batch_size=16,
    class_mode='categorical'
)
Found 11 images belonging to 4 classes.
Found 13 images belonging to 4 classes.

def encoder(input_layer):
    x = MobileNetV2(include_top=False, input_tensor=input_layer)
    return x

def color_layer(backbone, num_classes=1):
    x = GlobalAveragePooling2D()(backbone.output)
    x = Dense(num_classes, activation='softmax', name='color_output')(x)
    return x

def breed_layer(backbone, num_classes=1):
    x = GlobalAveragePooling2D()(backbone.output)
    x = Dense(num_classes, activation='softmax', name='breed_output')(x)
    return x

编码器

input_layer = Input(shape=input_shape)
backbone = encoder(input_layer)

颜色特征模型。

color_branch = color_layer(backbone, num_classes=4)
color_model = Model(inputs=input_layer, outputs=color_branch)
color_model.compile(
    loss={'color_output': 'categorical_crossentropy'},
    optimizer='adam', 
    metrics=['accuracy']
)
color_model.fit(color_data, epochs=4) # OK
color_model.save('color_model')

品种特征模型。

# we can freeze the backbone here
# as these dataset contains same type of object.
# but this is optional.
backbone.trainable = False

breed_branch = breed_layer(backbone, num_classes=4)
breed_model = Model(inputs=input_layer, outputs=breed_branch)
breed_model.compile(
    loss={'breed_output': 'categorical_crossentropy'},
    optimizer='adam', 
    metrics=['accuracy']
)
breed_model.fit(breed_data, epochs=4) # OK
breed_model.save('breed_model')

现在，为了推断，我们可以做

color_model = tf.keras.models.load_model('color_model')
breed_model = tf.keras.models.load_model('breed_model')

def inference_model():
    input_layer = Input(shape=input_shape)
    color_output = color_model(input_layer)
    breed_output = breed_model(input_layer)
    multi_task = Model(
        inputs=input_layer, outputs=[color_output, breed_output]
    )
    return multi_task
    
pred_model = inference_model()

x, _ = next(iter(breed_data))
x[0].shape # (224, 224, 3)
a, b = pred_model(x[0][None, ...])
a.numpy().argmax(-1) # array([0])
b.numpy().argmax(-1) # array([3])

这里的代价是，你需要对模型进行两次训练和推理。尽管可以使用单一模型进一步优化推理管道。例如，我们可以分别保存编码器、顶部颜色和品种分支。在推理时间内，我们从编码器获取输出特征并将它们传递到颜色和品种分支的顶层。

多任务学习对狗图像进行分类

问题描述投票：0回答：1

1个回答

最新问题

多任务学习对狗图像进行分类

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1