有一个 CNN 模型来对图像进行分类:图像是或否,有些有复选框,有些被圈出(或不圈出)
我的目标是让模型预测它们属于哪个类别。
因此创建了4个文件夹:circled、not_circled、ticked、not_ticked
增加我处理的每个类别的图像数量 图像以更改分辨率(80x80、120x80)。
不同类别的图像数量或多或少相同。
模型摘要说我有 2M 个参数,准确率为 98.2%
我有什么可以改进的方法吗?
尝试减少参数但性能较差。
我目前使用的代码
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import Adam
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import classification_report
import logging
import random
# Set the random seeds for reproducibility
os.environ["PYTHONHASHSEED"] = "0"
random.seed(42)
np.random.seed(42)
tf.random.set_seed(42)
os.environ["TF_DETERMINISTIC_OPS"] = "1"
# Initialize logger
logging.basicConfig(level=logging.INFO)
LOGGER = logging.getLogger(__name__)
# Load and preprocess images
def load_images_from_folder(folder):
current_dir = os.getcwd()
folder_path = os.path.join(current_dir, folder)
LOGGER.info(f"Folder path is {folder_path}")
images = []
labels = []
if not os.path.isdir(folder_path):
raise ValueError(f"Folder {folder_path} does not exist.")
for label in os.listdir(folder_path):
LOGGER.info(f"Processing label: {label}")
if label == "images_test":
continue
label_folder = os.path.join(folder_path, label)
LOGGER.info(f"Label '{label}' folder is {label_folder}")
if not os.path.isdir(label_folder):
continue
# Walk through the label folder and its subfolders
for root, dirs, files in os.walk(label_folder):
for file in files:
img_path = os.path.join(root, file)
# LOGGER.info(f"Processing image {img_path}")
img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE) # Load image in grayscale
if img is not None:
img = cv2.resize(img, (80, 140)) # Resize to 80x140 (width x height) for the model
images.append(img)
labels.append(label)
else:
LOGGER.warning(f"Failed to load image {img_path}")
# Removed the 'break' statement to allow processing all labels
return np.array(images), np.array(labels)
# Load data
LOGGER.info("Loading images from dataset")
image_data, labels = load_images_from_folder("dataset/")
image_data = image_data.reshape(-1, 140, 80, 1) # Add channel dimension for grayscale
image_data = image_data / 255.0 # Normalize the pixel values
# Convert labels to integers (e.g., ticked=0, unticked=1, circled_yes=2, circled_no=3)
LOGGER.info("Labeling images")
label_mapping = {"ticked": 0, "unticked": 1, "circled_yes": 2, "circled_no": 3}
labels = np.array([label_mapping[label] for label in labels])
# Count the number of images per label
unique_labels, counts = np.unique(labels, return_counts=True)
label_names = {v: k for k, v in label_mapping.items()} # Reverse mapping
print("\nNumber of images per label in the entire dataset:")
for label_int, count in zip(unique_labels, counts):
label_name = label_names[label_int]
print(f"{label_name} ({label_int}): {count} images")
# Split data into training, validation, and testing
LOGGER.info("Splitting data into training, validation, and testing")
X_train_full, X_test, y_train_full, y_test = train_test_split(image_data, labels, test_size=0.2, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(
X_train_full, y_train_full, test_size=0.1, random_state=42
) # 10% of training data for validation
# Function to count labels
def count_labels(y, dataset_name):
unique_labels, counts = np.unique(y, return_counts=True)
print(f"\nNumber of images per label in the {dataset_name} set:")
for label_int, count in zip(unique_labels, counts):
label_name = label_names[label_int]
print(f"{label_name} ({label_int}): {count} images")
# Count labels in each dataset
count_labels(y_train, "training")
count_labels(y_val, "validation")
count_labels(y_test, "testing")
# Compute class weights
class_weights_array = class_weight.compute_class_weight(class_weight="balanced", classes=np.unique(y_train), y=y_train)
class_weights = dict(enumerate(class_weights_array))
# Create the CNN model
LOGGER.info("Creating CNN model with dropout")
model = models.Sequential()
# First convolutional block
model.add(layers.Conv2D(32, (3, 3), activation="relu", input_shape=(140, 80, 1)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25)) # Dropout layer added
# Second convolutional block
model.add(layers.Conv2D(64, (3, 3), activation="relu"))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25)) # Dropout layer added
# Third convolutional block
model.add(layers.Conv2D(64, (3, 3), activation="relu"))
# Optional pooling layer if needed
# model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Dropout(0.25)) # Dropout layer added
# Flatten and dense layers
model.add(layers.Flatten())
model.add(layers.Dense(64, activation="relu"))
model.add(layers.Dropout(0.5)) # Dropout layer added
model.add(layers.Dense(4, activation="softmax"))
optimizer = Adam(learning_rate=0.001)
lr_scheduler = ReduceLROnPlateau(monitor="val_loss", factor=0.1, patience=3)
# Compile the model
LOGGER.info("Compiling the model")
model.compile(optimizer=optimizer, loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()
# Train the model
LOGGER.info("Training the model")
early_stopping = EarlyStopping(monitor="val_loss", patience=5, restore_best_weights=True)
history = model.fit(
X_train,
y_train,
epochs=100,
validation_data=(X_val, y_val),
callbacks=[early_stopping, lr_scheduler],
class_weight=class_weights,
)
print("")
print("")
print("")
print("")
print("")
# Plot training and validation accuracy and loss
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history["accuracy"], label="Training Accuracy", color="blue")
plt.plot(history.history["val_accuracy"], label="Validation Accuracy", color="orange")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.ylim([0, 1])
plt.legend(loc="lower right")
plt.title("Model Accuracy")
plt.subplot(1, 2, 2)
plt.plot(history.history["loss"], label="Training Loss", color="blue")
plt.plot(history.history["val_loss"], label="Validation Loss", color="orange")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(loc="upper right")
plt.title("Model Loss")
plt.tight_layout()
plt.show()
# Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=2)
LOGGER.info(f"Test accuracy: {test_acc}")
# Evaluate the model
LOGGER.info("\nEvaluating the model")
LOGGER.info("Classification Report")
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)
print(classification_report(y_test, y_pred, target_names=label_mapping.keys()))
并且给了我 98% 的准确率和 0.2698 的验证损失(在训练期间单调下降) 最终报告是:
precision recall f1-score support
ticked 0.96 0.97 0.96 69
unticked 0.97 0.95 0.96 59
circled_yes 0.99 1.00 0.99 79
circled_no 1.00 0.99 0.99 70
accuracy 0.98 277
macro avg 0.98 0.98 0.98 277
weighted avg 0.98 0.98 0.98 277
尝试向数据集添加更多图像,但似乎会降低性能,所以我陷入困境
增强似乎会混淆模型并且准确性降低
有什么改进模型的技巧吗?
我有一些提高你训练效果的建议。
避免进行使图像原始标签发生变化的增强。例如,如果图像的一个角有一个圆圈,并且您进行了随机中心裁剪,那么您将丢失该圆圈,但图像仍带有
circled
标签。
我看到您正在将所有图像加载到 numpy 数组中。这不是内存高效的(除非数据大小有限)。最好使用数据加载器代替
这些是一般提示,但如果我们获得有关您的用例的更多信息,将会有所帮助: