我正在尝试使用更快的 r cnn ResNet50 制作分类和检测模型,但我不断收到此错误
ValueError Traceback (most recent call last)
Cell In[38], line 2
1 # Train the model
----> 2 history = model.fit(
3 train_dataset,
4 validation_data=val_dataset,
5 epochs=10, # Adjust the number of epochs based on the performance
6 )
File c:\Users\muham\OneDrive\Desktop\Tensorflow_O_B_D_Folder\myenv_tf\lib\site-packages\keras\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
67 filtered_tb = _process_traceback_frames(e.__traceback__)
68 # To get the full stack trace, call:
69 # `tf.debugging.disable_traceback_filtering()`
---> 70 raise e.with_traceback(filtered_tb) from None
71 finally:
72 del filtered_tb
File ~\AppData\Local\Temp\__autograph_generated_filejpp6xfsb.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
13 try:
14 do_return = True
---> 15 retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
16 except:
17 do_return = False
...
File "c:\Users\muham\OneDrive\Desktop\Tensorflow_O_B_D_Folder\myenv_tf\lib\site-packages\keras\losses.py", line 1486, in mean_squared_error
return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
ValueError: Dimensions must be equal, but are 4 and 5 for '{{node bbox_loss/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model_2/reshape_7/Reshape, Cast)' with input shapes: [?,115,4], [?,115,5].
我不太熟悉这个模型,所以我不知道该怎么做,所以我没有采取任何步骤来尝试修复它。
我检查了互联网,但我不明白如何解决它
import tensorflow as tf
from tensorflow import keras
import os
import cv2
import numpy as np
from tensorflow.keras import layers
# Path to images and annotations
image_folder = 'archive (20)/images'
annotation_folder = 'annotations'
# Fixed image size (e.g., 512x512)
fixed_height = 512
fixed_width = 512
# Function to load images and annotations from text file format
def load_data(image_folder, annotation_folder, fixed_height, fixed_width):
images = []
annotations = []
for image_file in os.listdir(image_folder):
if image_file.endswith(".png") or image_file.endswith(".jpg"):
# Load image
image_path = os.path.join(image_folder, image_file)
image = cv2.imread(image_path)
# Resize image to fixed size
image_resized = cv2.resize(image, (fixed_width, fixed_height))
images.append(image_resized)
# Load corresponding annotation
annotation_file = image_file.replace('.png', '.txt').replace('.jpg', '.txt')
annotation_path = os.path.join(annotation_folder, annotation_file)
with open(annotation_path, 'r') as f:
bboxes = []
for line in f:
xmin, ymin, xmax, ymax, label = line.strip().split(' ')
bbox = [int(xmin), int(ymin), int(xmax), int(ymax), label]
bboxes.append(bbox)
annotations.append(bboxes)
# Convert to numpy arrays for TensorFlow
images = np.array(images)
annotations = np.array(annotations, dtype=object) # Keep as object array due to varying numbers of boxes
# Display shapes
print("Images shape:", images.shape)
print("Annotations shape:", len(annotations), "annotations, each containing varying bounding boxes.")
return images, annotations
# Load the data
images, annotations = load_data(image_folder, annotation_folder, fixed_height, fixed_width)
输出: 图像形状:(853, 512, 512, 3) 注释形状:853 个注释,每个注释包含不同的边界框。
def preprocess_annotations(annotations, fixed_height, fixed_width, class_mapping):
processed_annotations = []
for bboxes in annotations:
processed_bboxes = []
for bbox in bboxes:
xmin, ymin, xmax, ymax, label = bbox
# Normalize coordinates
xmin = xmin / fixed_width
xmax = xmax / fixed_width
ymin = ymin / fixed_height
ymax = ymax / fixed_height
# Convert class label to numerical form
label = class_mapping[label]
processed_bboxes.append([xmin, ymin, xmax, ymax, label])
processed_annotations.append(processed_bboxes)
return np.array(processed_annotations, dtype=object)
class_mapping = {
'with_mask': 1,
'without_mask': 0,
'mask_weared_incorrect': 2
}
# Preprocess annotations
processed_annotations = preprocess_annotations(annotations, fixed_height, fixed_width, class_mapping)
# Check shape of processed annotations
print("Processed Annotations shape:", len(processed_annotations), "images with bounding boxes.")
输出 处理后的注释形状:853 个带有边界框的图像。
from sklearn.model_selection import train_test_split
# Split into training and validation sets
train_images, val_images, train_annotations, val_annotations = train_test_split(
images, processed_annotations, test_size=0.2, random_state=42
)
print(f"Training images: {len(train_images)}, Validation images: {len(val_images)}")
`
Output:
Training images: 682, Validation images: 171
`
# Find the maximum number of bounding boxes in any image
max_bboxes = max([len(ann) for ann in processed_annotations])
print(f"Maximum number of bounding boxes in an image: {max_bboxes}")
输出: 图像中边界框的最大数量:115
import numpy as np
def pad_annotations(annotations, max_bboxes):
padded_annotations = []
for ann in annotations:
# If the number of bounding boxes is less than the max, pad with zeros
while len(ann) < max_bboxes:
ann.append([0, 0, 0, 0, 0]) # Pad with [xmin, ymin, xmax, ymax, label] = [0, 0, 0, 0, 0]
padded_annotations.append(ann)
return np.array(padded_annotations)
# Pad the annotations
padded_train_annotations = pad_annotations(train_annotations, max_bboxes)
padded_val_annotations = pad_annotations(val_annotations, max_bboxes)
# Check the shapes
print("Padded training annotations shape:", padded_train_annotations.shape)
print("Padded validation annotations shape:", padded_val_annotations.shape)
输出: 填充训练注释形状:(682, 115, 5) 填充验证注释形状:(171, 115, 5)
# Set batch size
batch_size = 8
# Function to convert data into TensorFlow Dataset
def create_tf_dataset(images, annotations, batch_size):
dataset = tf.data.Dataset.from_tensor_slices((images, annotations))
dataset = dataset.shuffle(buffer_size=len(images))
dataset = dataset.batch(batch_size)
return dataset
# Create training and validation datasets
train_dataset = create_tf_dataset(train_images, padded_train_annotations, batch_size)
val_dataset = create_tf_dataset(val_images, padded_val_annotations, batch_size)
# Check dataset shapes
for img_batch, ann_batch in train_dataset.take(1):
print("Image batch shape:", img_batch.shape)
print("Annotation batch shape:", ann_batch.shape)
输出: 图像批量形状:(8, 512, 512, 3) 注释批量形状:(8, 115, 5)
# Define the backbone model (pre-trained)
backbone = tf.keras.applications.ResNet50(
include_top=False, input_shape=(512, 512, 3), weights='imagenet'
)
# Freeze the backbone layers (optional, you can unfreeze them later for fine-tuning)
backbone.trainable = False
# Add custom layers on top of the backbone for object detection
def create_model(backbone):
inputs = keras.Input(shape=(512, 512, 3))
# Pass input through the backbone
x = backbone(inputs, training=False)
# Add some layers for detection (you can adjust this based on Faster R-CNN needs)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
# Output layers for bounding boxes and classification
bbox_output = layers.Dense(115 * 4, activation='sigmoid')(x) # For bounding box coordinates
class_output = layers.Dense(115, activation='softmax')(x) # For class labels
# Reshape the output to match the desired number of boxes
bbox_output = layers.Reshape((115, 4))(bbox_output)
class_output = layers.Reshape((115, 1))(class_output) # 3 classes: with_mask, without_mask, mask_weared_incorrect
# Combine the outputs into one model
model = keras.Model(inputs=inputs, outputs=[bbox_output, class_output])
return model
# Create the model
model = create_model(backbone)
model.summary()
# Define custom loss functions for bounding box regression and classification
def bbox_loss(y_true, y_pred):
return tf.reduce_mean(tf.losses.mean_squared_error(y_true, y_pred)) # Loss for bounding box coordinates
def class_loss(y_true, y_pred):
return tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_pred)) # Loss for class predictions
# Compile the model
model.compile(
optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
loss=[bbox_loss, class_loss],
metrics=['accuracy']
`
`
# Train the model
history = model.fit(
train_dataset,
validation_data=val_dataset,
epochs=10, # Adjust the number of epochs based on the performance
)
除了改变损失函数之外,我没有尝试任何其他事情
def custom_loss(y_true, y_pred):
# Split the true annotations into bounding boxes and class labels
true_bboxes = y_true[:, :, :4] # First 4 values are bounding box coordinates
true_labels = y_true[:, :, 4] # Last value is the class label
# Split the predicted outputs into bounding boxes and class predictions
pred_bboxes, pred_labels = y_pred # The model will output a tuple of (bbox_output, class_output)
# Calculate the bounding box loss (mean squared error) between true and predicted bboxes
bbox_loss = tf.reduce_mean(tf.losses.mean_squared_error(true_bboxes, pred_bboxes))
# Calculate the classification loss (sparse categorical cross-entropy) for class labels
class_loss = tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(true_labels, pred_labels))
# Combine the losses
return bbox_loss + class_loss
它返回此错误:
OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.
有人可以帮我解决这个错误吗?
我找到了解决方案,这是因为模型对真实标签的 class_labels 采取了不同的形状
class_output = layers.Dense(115, activation='softmax')(x)
class_output = layers.Reshape((115, 1))(class_output)
在模型函数中将上面的代码改为这样
class_output = layers.Dense(115 * 3, activation='softmax')(x)
class_output = layers.Reshape((115, 3))(class_output)
并且类别损失从
变化def class_loss(y_true, y_pred):
return tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_pred)) # Loss for class predictions
对此:
def class_loss(y_true, y_pred):
true_labels = y_true[:, :, 4] # Extract the class labels from y_true
return tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(true_labels, y_pred)) # Loss for class predictions