Faster r cnn ResNet50 模型的尺寸输出不匹配

Question

我正在尝试使用更快的 r cnn ResNet50 制作分类和检测模型，但我不断收到此错误

ValueError                                Traceback (most recent call last)
Cell In[38], line 2
      1 # Train the model
----> 2 history = model.fit(
      3     train_dataset,
      4     validation_data=val_dataset,
      5     epochs=10,  # Adjust the number of epochs based on the performance
      6 )

File c:\Users\muham\OneDrive\Desktop\Tensorflow_O_B_D_Folder\myenv_tf\lib\site-packages\keras\utils\traceback_utils.py:70, in filter_traceback.<locals>.error_handler(*args, **kwargs)
     67     filtered_tb = _process_traceback_frames(e.__traceback__)
     68     # To get the full stack trace, call:
     69     # `tf.debugging.disable_traceback_filtering()`
---> 70     raise e.with_traceback(filtered_tb) from None
     71 finally:
     72     del filtered_tb

File ~\AppData\Local\Temp\__autograph_generated_filejpp6xfsb.py:15, in outer_factory.<locals>.inner_factory.<locals>.tf__train_function(iterator)
     13 try:
     14     do_return = True
---> 15     retval_ = ag__.converted_call(ag__.ld(step_function), (ag__.ld(self), ag__.ld(iterator)), None, fscope)
     16 except:
     17     do_return = False
...
    File "c:\Users\muham\OneDrive\Desktop\Tensorflow_O_B_D_Folder\myenv_tf\lib\site-packages\keras\losses.py", line 1486, in mean_squared_error
        return backend.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)

    ValueError: Dimensions must be equal, but are 4 and 5 for '{{node bbox_loss/SquaredDifference}} = SquaredDifference[T=DT_FLOAT](model_2/reshape_7/Reshape, Cast)' with input shapes: [?,115,4], [?,115,5].

我不太熟悉这个模型，所以我不知道该怎么做，所以我没有采取任何步骤来尝试修复它。

我检查了互联网，但我不明白如何解决它

import tensorflow as tf
from tensorflow import keras
import os
import cv2
import numpy as np
from tensorflow.keras import layers

# Path to images and annotations
image_folder = 'archive (20)/images'
annotation_folder = 'annotations'

# Fixed image size (e.g., 512x512)
fixed_height = 512
fixed_width = 512

# Function to load images and annotations from text file format
def load_data(image_folder, annotation_folder, fixed_height, fixed_width):
    images = []
    annotations = []
    
    for image_file in os.listdir(image_folder):
        if image_file.endswith(".png") or image_file.endswith(".jpg"):
            # Load image
            image_path = os.path.join(image_folder, image_file)
            image = cv2.imread(image_path)
            
            # Resize image to fixed size
            image_resized = cv2.resize(image, (fixed_width, fixed_height))
            images.append(image_resized)

            # Load corresponding annotation
            annotation_file = image_file.replace('.png', '.txt').replace('.jpg', '.txt')
            annotation_path = os.path.join(annotation_folder, annotation_file)
            with open(annotation_path, 'r') as f:
                bboxes = []
                for line in f:
                    xmin, ymin, xmax, ymax, label = line.strip().split(' ')
                    bbox = [int(xmin), int(ymin), int(xmax), int(ymax), label]
                    bboxes.append(bbox)
                annotations.append(bboxes)
    
    # Convert to numpy arrays for TensorFlow
    images = np.array(images)
    annotations = np.array(annotations, dtype=object)  # Keep as object array due to varying numbers of boxes
    
    # Display shapes
    print("Images shape:", images.shape)
    print("Annotations shape:", len(annotations), "annotations, each containing varying bounding boxes.")
    
    return images, annotations

# Load the data
images, annotations = load_data(image_folder, annotation_folder, fixed_height, fixed_width)

输出：图像形状：(853, 512, 512, 3) 注释形状：853 个注释，每个注释包含不同的边界框。

def preprocess_annotations(annotations, fixed_height, fixed_width, class_mapping):
    processed_annotations = []
    for bboxes in annotations:
        processed_bboxes = []
        for bbox in bboxes:
            xmin, ymin, xmax, ymax, label = bbox
            # Normalize coordinates
            xmin = xmin / fixed_width
            xmax = xmax / fixed_width
            ymin = ymin / fixed_height
            ymax = ymax / fixed_height
            # Convert class label to numerical form
            label = class_mapping[label]
            processed_bboxes.append([xmin, ymin, xmax, ymax, label])
        processed_annotations.append(processed_bboxes)
    
    return np.array(processed_annotations, dtype=object)

class_mapping = {
    'with_mask': 1,
    'without_mask': 0,
    'mask_weared_incorrect': 2
}


# Preprocess annotations
processed_annotations = preprocess_annotations(annotations, fixed_height, fixed_width, class_mapping)

# Check shape of processed annotations
print("Processed Annotations shape:", len(processed_annotations), "images with bounding boxes.")

输出处理后的注释形状：853 个带有边界框的图像。

from sklearn.model_selection import train_test_split

# Split into training and validation sets
train_images, val_images, train_annotations, val_annotations = train_test_split(
    images, processed_annotations, test_size=0.2, random_state=42
)

print(f"Training images: {len(train_images)}, Validation images: {len(val_images)}")
`
Output:
Training images: 682, Validation images: 171

`
# Find the maximum number of bounding boxes in any image
max_bboxes = max([len(ann) for ann in processed_annotations])
print(f"Maximum number of bounding boxes in an image: {max_bboxes}")

输出：图像中边界框的最大数量：115

import numpy as np

def pad_annotations(annotations, max_bboxes):
    padded_annotations = []
    for ann in annotations:
        # If the number of bounding boxes is less than the max, pad with zeros
        while len(ann) < max_bboxes:
            ann.append([0, 0, 0, 0, 0])  # Pad with [xmin, ymin, xmax, ymax, label] = [0, 0, 0, 0, 0]
        padded_annotations.append(ann)
    return np.array(padded_annotations)

# Pad the annotations
padded_train_annotations = pad_annotations(train_annotations, max_bboxes)
padded_val_annotations = pad_annotations(val_annotations, max_bboxes)

# Check the shapes
print("Padded training annotations shape:", padded_train_annotations.shape)
print("Padded validation annotations shape:", padded_val_annotations.shape)

输出：填充训练注释形状：(682, 115, 5) 填充验证注释形状：(171, 115, 5)

# Set batch size
batch_size = 8

# Function to convert data into TensorFlow Dataset
def create_tf_dataset(images, annotations, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((images, annotations))
    dataset = dataset.shuffle(buffer_size=len(images))
    dataset = dataset.batch(batch_size)
    return dataset

# Create training and validation datasets
train_dataset = create_tf_dataset(train_images, padded_train_annotations, batch_size)
val_dataset = create_tf_dataset(val_images, padded_val_annotations, batch_size)

# Check dataset shapes
for img_batch, ann_batch in train_dataset.take(1):
    print("Image batch shape:", img_batch.shape)
    print("Annotation batch shape:", ann_batch.shape)

输出：图像批量形状：(8, 512, 512, 3) 注释批量形状：(8, 115, 5)

# Define the backbone model (pre-trained)
backbone = tf.keras.applications.ResNet50(
    include_top=False, input_shape=(512, 512, 3), weights='imagenet'
)

# Freeze the backbone layers (optional, you can unfreeze them later for fine-tuning)
backbone.trainable = False

# Add custom layers on top of the backbone for object detection
def create_model(backbone):
    inputs = keras.Input(shape=(512, 512, 3))
    
    # Pass input through the backbone
    x = backbone(inputs, training=False)
    
    # Add some layers for detection (you can adjust this based on Faster R-CNN needs)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(512, activation='relu')(x)
    
    # Output layers for bounding boxes and classification
    bbox_output = layers.Dense(115 * 4, activation='sigmoid')(x)  # For bounding box coordinates
    class_output = layers.Dense(115, activation='softmax')(x)     # For class labels
    
    # Reshape the output to match the desired number of boxes
    bbox_output = layers.Reshape((115, 4))(bbox_output)
    class_output = layers.Reshape((115, 1))(class_output)  # 3 classes: with_mask, without_mask, mask_weared_incorrect
    
    # Combine the outputs into one model
    model = keras.Model(inputs=inputs, outputs=[bbox_output, class_output])
    
    return model

# Create the model
model = create_model(backbone)
model.summary()

# Define custom loss functions for bounding box regression and classification
def bbox_loss(y_true, y_pred):
    return tf.reduce_mean(tf.losses.mean_squared_error(y_true, y_pred))  # Loss for bounding box coordinates

def class_loss(y_true, y_pred):
    return tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_pred))  # Loss for class predictions

# Compile the model
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss=[bbox_loss, class_loss],
    metrics=['accuracy']
`
`
# Train the model
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=10,  # Adjust the number of epochs based on the performance
)

除了改变损失函数之外，我没有尝试任何其他事情

def custom_loss(y_true, y_pred):
    # Split the true annotations into bounding boxes and class labels
    true_bboxes = y_true[:, :, :4]  # First 4 values are bounding box coordinates
    true_labels = y_true[:, :, 4]   # Last value is the class label
    
    # Split the predicted outputs into bounding boxes and class predictions
    pred_bboxes, pred_labels = y_pred  # The model will output a tuple of (bbox_output, class_output)
    
    # Calculate the bounding box loss (mean squared error) between true and predicted bboxes
    bbox_loss = tf.reduce_mean(tf.losses.mean_squared_error(true_bboxes, pred_bboxes))
    
    # Calculate the classification loss (sparse categorical cross-entropy) for class labels
    class_loss = tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(true_labels, pred_labels))
    
    # Combine the losses
    return bbox_loss + class_loss

它返回此错误：

OperatorNotAllowedInGraphError: Iterating over a symbolic `tf.Tensor` is not allowed: AutoGraph did convert this function. This might indicate you are trying to use an unsupported feature.

有人可以帮我解决这个错误吗？

Answer 1

我找到了解决方案，这是因为模型对真实标签的 class_labels 采取了不同的形状

class_output = layers.Dense(115, activation='softmax')(x)  
class_output = layers.Reshape((115, 1))(class_output)

在模型函数中将上面的代码改为这样

class_output = layers.Dense(115 * 3, activation='softmax')(x)
class_output = layers.Reshape((115, 3))(class_output)

并且类别损失从

变化

def class_loss(y_true, y_pred):
    return tf.reduce_mean(tf.losses.categorical_crossentropy(y_true, y_pred))  # Loss for class predictions

对此：

def class_loss(y_true, y_pred):
    true_labels = y_true[:, :, 4]  # Extract the class labels from y_true
    return tf.reduce_mean(tf.losses.sparse_categorical_crossentropy(true_labels, y_pred))  # Loss for class predictions

Faster r cnn ResNet50 模型的尺寸输出不匹配

问题描述投票：0回答：1

1个回答

最新问题

Faster r cnn ResNet50 模型的尺寸输出不匹配

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1