我尝试使用 ssd 和 mobilenetv2 训练自定义对象检测模型,尽管 ssd_loss 函数似乎与模型的输出形状不匹配。我该如何解决它?
模型准备
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Input, Conv2D, Reshape, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
# Define the input layer
input_layer = Input(shape=(224, 224, 3))
# Load the MobileNetV2 model, excluding the top layers
base_model = MobileNetV2(input_tensor=input_layer, include_top=False, weights='imagenet')
# Add custom layers for SSD
x = base_model.output
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = Conv2D(256, (3, 3), activation='relu', padding='same')(x)
# Define the number of classes and bounding box coordinates
num_classes = 2 # Number of classes
num_bboxes = 4 # xmin, ymin, xmax, ymax
# Classification head
class_output = Conv2D(num_classes, (1, 1), activation='sigmoid', name='class_output')(x)
# Bounding box regression head
bbox_output = Conv2D(num_bboxes, (1, 1), activation='linear', name='bbox_output')(x)
# Reshape the outputs
class_output = Reshape((-1, num_classes))(class_output)
bbox_output = Reshape((-1, num_bboxes))(bbox_output)
# Concatenate the outputs
output = Concatenate(axis=-1)([class_output, bbox_output])
# Create the model
model = Model(inputs=input_layer, outputs=output)
# Print the model summary
model.summary()
模型编译:
from tensorflow.keras.losses import BinaryCrossentropy, MeanSquaredError
def ssd_loss(y_true, y_pred):
num_classes = 2 # Number of classes (e.g., Trophozoite, WBC)
num_bboxes = 4 # xmin, ymin, xmax, ymax
class_pred = y_pred[..., :num_classes]
bbox_pred = y_pred[..., num_classes:num_classes + num_bboxes]
class_loss = BinaryCrossentropy()(y_true[..., :num_classes], class_pred)
bbox_loss = MeanSquaredError()(y_true[..., num_classes:], bbox_pred)
return class_loss + bbox_loss
# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss=ssd_loss)
# Training Callbacks
callbacks = [
tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss')
]
# Train the Model
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=30,
callbacks=callbacks
)
从 tfrecords 解析数据:
import tensorflow as tf
def parse_tf_example(example_proto):
# Define the feature description dictionary
feature_description = {
'image/height': tf.io.FixedLenFeature([], tf.int64),
'image/width': tf.io.FixedLenFeature([], tf.int64),
'image/encoded': tf.io.FixedLenFeature([], tf.string),
'image/format': tf.io.FixedLenFeature([], tf.string),
'image/object/bbox/xmin': tf.io.FixedLenFeature([], tf.float32),
'image/object/bbox/ymin': tf.io.FixedLenFeature([], tf.float32),
'image/object/bbox/xmax': tf.io.FixedLenFeature([], tf.float32),
'image/object/bbox/ymax': tf.io.FixedLenFeature([], tf.float32),
'image/object/class/label': tf.io.FixedLenFeature([], tf.int64)
}
parsed_features = tf.io.parse_single_example(example_proto, feature_description)
image = tf.image.decode_jpeg(parsed_features['image/encoded'], channels=3)
image = tf.image.resize(image, [224, 224])
image = tf.cast(image, tf.float32) / 255.0
label = tf.one_hot(parsed_features['image/object/class/label'], depth=2) # One-hot encode the class label
bbox = [parsed_features['image/object/bbox/xmin'],
parsed_features['image/object/bbox/ymin'],
parsed_features['image/object/bbox/xmax'],
parsed_features['image/object/bbox/ymax']]
# Combine label and bbox into a single tensor
combined_label = tf.concat([label, bbox], axis=-1)
# Reshape the combined label to match the model's output shape
combined_label = tf.reshape(combined_label, [-1, 6])
return image, combined_label
# Function to load TFRecords and create a dataset
def load_tfrecords(tfrecord_path):
raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
parsed_dataset = raw_dataset.map(parse_tf_example, num_parallel_calls=tf.data.experimental.AUTOTUNE)
return parsed_dataset
# Load the train and validation datasets
train_dataset = load_tfrecords('train.tfrecord')
val_dataset = load_tfrecords('val.tfrecord')
# Batch and prefetch the datasets
train_dataset = train_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
val_dataset = val_dataset.batch(32).prefetch(tf.data.experimental.AUTOTUNE)
我已经尝试编译和训练代码,但为什么我在图像的形状中得到“1”
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[17], line 27
21 callbacks = [
22 tf.keras.callbacks.ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss'),
23 tf.keras.callbacks.EarlyStopping(patience=5, monitor='val_loss')
24 ]
26 # Train the Model
---> 27 history = model.fit(
28 train_dataset,
29 validation_data=val_dataset,
30 epochs=30,
31 callbacks=callbacks
32 )
File c:\Users\achma\AppData\Local\Programs\Python\Python312\Lib\site-packages\keras\src\utils\traceback_utils.py:122, in filter_traceback.<locals>.error_handler(*args, **kwargs)
119 filtered_tb = _process_traceback_frames(e.__traceback__)
120 # To get the full stack trace, call:
121 # `keras.config.disable_traceback_filtering()`
--> 122 raise e.with_traceback(filtered_tb) from None
123 finally:
124 del filtered_tb
Cell In[17], line 10
7 class_pred = y_pred[..., :num_classes]
...
---> 10 class_loss = BinaryCrossentropy()(y_true[..., :num_classes], class_pred)
11 bbox_loss = MeanSquaredError()(y_true[..., num_classes:], bbox_pred)
13 return class_loss + bbox_loss
ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 1, 2), output.shape=(None, 49, 2)
您应该在代码中检查您为 fit() 提供的目标尺寸以及模型输出的尺寸(为什么是 49)。你的train_dataset是如何定义的?为什么不使用一个密集层作为模型的最后一层?