尽管进行了超参数调整,模型始终以高置信度将图像分类为猫

问题描述 投票:0回答:1

我正在尝试使用下面的代码在图像上训练模型。我的目录结构如下:

  • PetImages
    文件夹大小约为1GB,位于我的
    main.py
    脚本旁边。
  • PetImages
    文件夹内,有两个子文件夹:
    Cats
    Dogs
  • 每个子文件夹分别包含 10,000 张猫和狗的图像。
import os
import tensorflow as tf
from keras import layers
from tensorflow import keras

# === Constants for easy hyperparameter tuning ===

BATCH_SIZE = 256  # Batch size for training.
EPOCHS = 80  # Number of training epochs.
LEARNING_RATE = 5e-4  # Learning rate for Adam optimizer.

# === End of constants ===

# Filter out corrupted images
def filter_corrupted_images():
    num_skipped = 0
    for folder_name in ("Cat", "Dog"):
        folder_path = os.path.join("PetImages", folder_name)
        for fname in os.listdir(folder_path):
            fpath = os.path.join(folder_path, fname)
            try:
                with open(fpath, "rb") as fobj:
                    is_jfif = b"JFIF" in fobj.read(10)
            except Exception:
                is_jfif = False
            if not is_jfif:
                num_skipped += 1
                os.remove(fpath)
    print(f"Deleted {num_skipped} corrupted images.")

# Generate Dataset
def generate_datasets(image_size=(180, 180), batch_size=BATCH_SIZE):
    train_ds, val_ds = keras.utils.image_dataset_from_directory(
        "PetImages",
        validation_split=0.2,
        subset="both",
        seed=1337,
        image_size=image_size,
        batch_size=batch_size,
    )
    return train_ds, val_ds

# Configure the Dataset for Performance
def configure_for_performance(ds):
    AUGMENTATION = keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.3),
        layers.RandomZoom(0.2),
        layers.RandomBrightness(0.2)
    ])
    ds = ds.map(lambda x, y: (AUGMENTATION(x, training=True), y),
                num_parallel_calls=tf.data.AUTOTUNE)
    return ds.prefetch(buffer_size=tf.data.AUTOTUNE)

# Define Model Architecture with adjusted strides and fewer pooling layers
def make_model(input_shape, num_classes=2):
    inputs = keras.Input(shape=input_shape)
    x = layers.Rescaling(1.0 / 255)(inputs)

    # Convolutional Layers with reduced stride for some layers
    FILTER_SIZES = [32, 64, 128, 256, 512]
    KERNEL_SIZE = (3, 3)
    DROPOUT_RATE = 0.5
    ACTIVATION_FUNCTION = "swish"

    for i, size in enumerate(FILTER_SIZES):
        x = layers.Conv2D(size, KERNEL_SIZE, strides=1 if i < 2 else 2, padding="same")(x)  # Use stride 1 for first two layers
        x = layers.BatchNormalization()(x)
        x = layers.Activation(ACTIVATION_FUNCTION)(x)
        if i < 3:  # Apply MaxPooling only in the first three layers
            x = layers.MaxPooling2D(pool_size=(2, 2))(x)

    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(DROPOUT_RATE)(x)
    outputs = layers.Dense(1 if num_classes == 2 else num_classes,
                           activation="sigmoid" if num_classes == 2 else "softmax")(x)

    model = keras.Model(inputs, outputs)
    return model

# Train the Model
def train_model(model, train_ds, val_ds, epochs=EPOCHS):
    model.compile(
        optimizer=keras.optimizers.Adam(LEARNING_RATE),
        loss="binary_crossentropy",
        metrics=["accuracy"],
    )
    checkpoint_callback = keras.callbacks.ModelCheckpoint(
        "model_best.keras", monitor="val_accuracy", save_best_only=True
    )
    early_stopping_callback = keras.callbacks.EarlyStopping(
        monitor="val_loss", patience=5, restore_best_weights=True
    )

    model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=epochs,
        callbacks=[checkpoint_callback, early_stopping_callback],
    )

# Check if model exists and load it for continuing training
def load_model_if_exists(model_filepath="model_best.keras"):
    if os.path.exists(model_filepath):
        print(f"Loading existing model from {model_filepath}...")
        model = keras.models.load_model(model_filepath)
    else:
        print("No existing model found, starting a new model...")
        model = make_model(input_shape=(180, 180, 3))
    return model

if __name__ == "__main__":
    filter_corrupted_images()

    # Check if a saved model exists, if yes, load it, if not, create a new one
    model = load_model_if_exists("model_best.keras")

    # Prepare the datasets for training
    train_ds, val_ds = generate_datasets(image_size=(180, 180), batch_size=BATCH_SIZE)
    train_ds = configure_for_performance(train_ds)
    val_ds = configure_for_performance(val_ds)

    # Continue training or start fresh
    train_model(model, train_ds, val_ds, epochs=EPOCHS)

    # Save the trained model in .keras format
    model.save("model_best.keras")

经过一番训练后,我使用下面的代码来测试我的模型。

import numpy as np
from tensorflow import keras
from tensorflow.keras.preprocessing import image

# Load the trained model
model = keras.models.load_model("model_best.keras")

# Preprocess image for prediction
def preprocess_image(img_path, image_size=(180, 180)):
    img = image.load_img(img_path, target_size=image_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = img_array / 255.0  # Normalize to match training
    return img_array

# Predict single image
def predict_image(img_path):
    img_array = preprocess_image(img_path)
    predictions = model.predict(img_array)
    print(f"The image at {img_path} is likely a Cat with confidence {1 - predictions[0][0]:.2f}")
    print(f"The image at {img_path} is likely a Dog with confidence {predictions[0][0]:.2f}")

if __name__ == "__main__":
    predict_image("Cat01.jpeg")
    predict_image("Cat02.jpg")
    predict_image("Cat03.jpg")
    predict_image("Dog01.jpeg")
    predict_image("Dog02.jpg")
    predict_image("Dog03.jpg")

这是输出:

1/1 ━━━━━━━━━━━━━━━━━━━━ 1s 642ms/step
The image at Cat01.jpeg is likely a Cat with confidence 0.91
The image at Cat01.jpeg is likely a Dog with confidence 0.09
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 31ms/step
The image at Cat02.jpg is likely a Cat with confidence 0.92
The image at Cat02.jpg is likely a Dog with confidence 0.08
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 37ms/step
The image at Cat03.jpg is likely a Cat with confidence 0.92
The image at Cat03.jpg is likely a Dog with confidence 0.08
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 44ms/step
The image at Dog01.jpeg is likely a Cat with confidence 0.92
The image at Dog01.jpeg is likely a Dog with confidence 0.08
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 30ms/step
The image at Dog02.jpg is likely a Cat with confidence 0.92
The image at Dog02.jpg is likely a Dog with confidence 0.08
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 31ms/step
The image at Dog03.jpg is likely a Cat with confidence 0.92
The image at Dog03.jpg is likely a Dog with confidence 0.08

它每次都会以几乎相同的置信度将图像分类为猫。 我尝试过更改批量大小、层数和学习率,但到目前为止没有任何效果。可能是什么问题?

python tensorflow machine-learning keras deep-learning
1个回答
0
投票

你会打乱你的训练数据吗?如果猫和狗作为单独的组输入,您的模型将预测案例中的最后一组“猫”。这是分类模型的本质。您需要先混合数据,然后再将其提供给网络。希望有帮助。

© www.soinside.com 2019 - 2024. All rights reserved.