使用 SSD 和 mobilenetv2 进行对象检测时“目标”和“输出形状”不匹配

问题描述 投票:0回答:1

我尝试使用 ssd 和 mobilenetv2 训练自定义对象检测模型,尽管 ssd_loss 函数似乎与模型的输出形状不匹配。我该如何解决它?

模型准备

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Conv2D, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

# Define the input layer
input_layer = Input(shape=(224, 224, 3))

# Load the MobileNetV2 model, excluding the top layers
base_model = MobileNetV2(input_tensor=input_layer, include_top=False, weights='imagenet')

# Add custom layers for SSD
x = base_model.output
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)

# Define the number of classes and bounding box coordinates
num_classes = 2  # Number of classes 
num_bboxes = 4  # xmin, ymin, xmax, ymax

# Classification head
class_output = Conv2D(num_classes, (1, 1), activation='sigmoid', name='class_output')(x)

# Bounding box regression head
bbox_output = Conv2D(num_bboxes, (1, 1), activation='linear', name='bbox_output')(x)

# Reshape the outputs
class_output = Reshape((-1, num_classes))(class_output)
bbox_output = Reshape((-1, num_bboxes))(bbox_output)

# Concatenate the outputs
output = Concatenate(axis=-1)([class_output, bbox_output])

# Create the model
model = Model(inputs=input_layer, outputs=output)

# Print the model summary
model.summary()

模型编译:


from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError

def ssd_loss(y_true, y_pred):
    num_classes = 2  # Number of classes (e.g., Trophozoite, WBC)
    num_bboxes = 4  # xmin, ymin, xmax, ymax
    
    class_pred = y_pred[..., :num_classes]
    bbox_pred = y_pred[..., num_classes:num_classes + num_bboxes]
    
    class_loss = BinaryCrossentropy()(y_true[..., :num_classes], class_pred)
    bbox_loss = MeanSquaredError()(y_true[..., num_classes:], bbox_pred)
    
    return class_loss + bbox_loss


# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss=ssd_loss)


# Training Callbacks
callbacks = [
    tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
    tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss')
]

# Train the Model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=30,
    callbacks=callbacks
)

从 tfrecords 解析数据:

import tensorflow as tf

def parse_tf_example(example_proto):
    # Define the feature description dictionary
    feature_description = {
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/format': tf.io.FixedLenFeature([], tf.string),
        'image/object/bbox/xmin': tf.io.FixedLenFeature([], tf.float32),
        'image/object/bbox/ymin': tf.io.FixedLenFeature([], tf.float32),
        'image/object/bbox/xmax': tf.io.FixedLenFeature([], tf.float32),
        'image/object/bbox/ymax': tf.io.FixedLenFeature([], tf.float32),
        'image/object/class/label': tf.io.FixedLenFeature([], tf.int64)
    }
    parsed_features = tf.io.parse_single_example(example_proto, feature_description)
    image = tf.image.decode_jpeg(parsed_features['image/encoded'], channels=3)
    image = tf.image.resize(image, [224, 224])
    image = tf.cast(image, tf.float32) / 255.0
    
    label = tf.one_hot(parsed_features['image/object/class/label'], depth=2)  # One-hot encode the class label
    bbox = [parsed_features['image/object/bbox/xmin'],
            parsed_features['image/object/bbox/ymin'],
            parsed_features['image/object/bbox/xmax'],
            parsed_features['image/object/bbox/ymax']]
    
    # Combine label and bbox into a single tensor
    combined_label = tf.concat([label, bbox], axis=-1)
    
    # Reshape the combined label to match the model's output shape
    combined_label = tf.reshape(combined_label, [-1, 6])
    
    return image, combined_label

# Function to load TFRecords and create a dataset
def load_tfrecords(tfrecord_path):
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
    parsed_dataset = raw_dataset.map(parse_tf_example, num_parallel_calls=tf.data.experimental.AUTOTUNE)
    return parsed_dataset

# Load the train and validation datasets
train_dataset = load_tfrecords('train.tfrecord')
val_dataset = load_tfrecords('val.tfrecord')

# Batch and prefetch the datasets
train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)

我已经尝试编译和训练代码,但为什么我在图像的形状中得到“1”

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[17], line 27
     21 callbacks = [
     22     tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
     23     tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss')
     24 ]
     26 # Train the Model
---> 27 history = model.fit(
     28     train_dataset,
     29     validation_data=val_dataset,
     30     epochs=30,
     31     callbacks=callbacks
     32 )

File c:\Users\achma\AppData\Local\Programs\Python\Python312\Lib\site-packages\keras\src\utils\traceback_utils.py:122, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    119     filtered_tb = _process_traceback_frames(e.__traceback__)
    120     # To get the full stack trace, call:
    121     # `keras.config.disable_traceback_filtering()`
--> 122     raise e.with_traceback(filtered_tb) from None
    123 finally:
    124     del filtered_tb

Cell In[17], line 10
      7 class_pred = y_pred[..., :num_classes]
...
---> 10 class_loss = BinaryCrossentropy()(y_true[..., :num_classes], class_pred)
     11 bbox_loss = MeanSquaredError()(y_true[..., num_classes:], bbox_pred)
     13 return class_loss + bbox_loss

ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1, 2), output.shape=(None, 49, 2)
tensorflow machine-learning classification object-detection mobilenet
1个回答
0
投票

您应该在代码中检查您为 fit() 提供的目标尺寸以及模型输出的尺寸(为什么是 49)。你的train_dataset是如何定义的?为什么不使用一个密集层作为模型的最后一层?

© www.soinside.com 2019 - 2024. All rights reserved.