我需要帮助将 YOLOv8 模型 添加到下面的代码中,而不是使用 InceptionV3 为我的项目提取图像特征。我需要传递检测到的对象并从 YOLOv8 模型中提取特征,以使用转换器生成标题。
def CNN_Encoder_Incep():
inception_v3 = tf.keras.applications.InceptionV3(
include_top=False,
weights='imagenet'
)
inception_v3.trainable = False
output = inception_v3.output
output = tf.keras.layers.Reshape(
(-1, output.shape[-1]))(output)
cnn_model = tf.keras.models.Model(inception_v3.input, output)
return cnn_model
class ImageCaptioningModel(tf.keras.Model):
def __init__(self, cnn_model, encoder, decoder, image_aug=None):
super().__init__()
self.cnn_model = cnn_model
self.encoder = encoder
self.decoder = decoder
self.image_aug = image_aug
self.loss_tracker = tf.keras.metrics.Mean(name="loss")
self.acc_tracker = tf.keras.metrics.Mean(name="accuracy")
def calculate_loss(self, y_true, y_pred, mask):
loss = self.loss(y_true, y_pred)
mask = tf.cast(mask, dtype=loss.dtype)
loss *= mask
return tf.reduce_sum(loss) / tf.reduce_sum(mask)
def calculate_accuracy(self, y_true, y_pred, mask):
accuracy = tf.equal(y_true, tf.argmax(y_pred, axis=2))
accuracy = tf.math.logical_and(mask, accuracy)
accuracy = tf.cast(accuracy, dtype=tf.float32)
mask = tf.cast(mask, dtype=tf.float32)
return tf.reduce_sum(accuracy) / tf.reduce_sum(mask)
def compute_loss_and_acc(self, img_embed, captions, training=True):
encoder_output = self.encoder(img_embed, training=True)
y_input = captions[:, :-1]
y_true = captions[:, 1:]
mask = (y_true != 0)
y_pred = self.decoder(
y_input, encoder_output, training=True, mask=mask
)
loss = self.calculate_loss(y_true, y_pred, mask)
acc = self.calculate_accuracy(y_true, y_pred, mask)
return loss, acc
def train_step(self, batch):
imgs, captions = batch
if self.image_aug:
imgs = self.image_aug(imgs)
img_embed = self.cnn_model(imgs)
with tf.GradientTape() as tape:
loss, acc = self.compute_loss_and_acc(
img_embed, captions
)
train_vars = (
self.encoder.trainable_variables + self.decoder.trainable_variables
)
grads = tape.gradient(loss, train_vars)
self.optimizer.apply_gradients(zip(grads, train_vars))
self.loss_tracker.update_state(loss)
self.acc_tracker.update_state(acc)
return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
def test_step(self, batch):
imgs, captions = batch
img_embed = self.cnn_model(imgs)
loss, acc = self.compute_loss_and_acc(
img_embed, captions, training=False
)
self.loss_tracker.update_state(loss)
self.acc_tracker.update_state(acc)
return {"loss": self.loss_tracker.result(), "acc": self.acc_tracker.result()}
@property
def metrics(self):
return [self.loss_tracker, self.acc_tracker]
cnn_model = CNN_Encoder_Incep()
caption_model = ImageCaptioningModel(
cnn_model=cnn_model, encoder=encoder, decoder=decoder, image_aug=image_augmentation,
)
我尝试这样做,但当我尝试将其传递给 cnn_model 变量时,我不断收到多个错误。
def CNN_Encoder():
yolov8_model = tf.keras.models.load_model('./content/yolov8n_objdet_oidv7_640x640.pt')
yolov8_model.trainable = False
output = yolov8_model.output
output = tf.keras.layers.Reshape((-1, output.shape[-1]))(output)
cnn_model = tf.keras.models.Model(yolov8_model.input, output)
cnn_model_onnx = cnn_model.export(format='onnx')
return cnn_model
.pt 文件扩展名意味着
./content/yolov8n_objdet_oidv7_640x640.pt
最有可能是 pytorch 状态字典,它们通常以 .pt 或 .pth 文件扩展名保存。
如果您想将预训练的 YOLOv8 模型加载到 keras 模型对象中,您可以使用 KerasCV 来完成。如果您只想要主干,则使用
keras_cv.models.YOLOV8Backbone.from_preset()
,或者对于整个对象检测模型使用 keras_cv.models.YOLOV8Detector()
。
在您的情况下,您可以尝试
yolov8_model = keras_cv.models.YOLOV8Backbone.from_preset("yolo_v8_xs_backbone_coco")
加载在COCO数据集上预训练的YOLOv8-nano权重。