使用 tf.data 加载数据时,MNIST 模型的准确性较差且训练时间较长

问题描述 投票:0回答:1

我是机器学习和Python的新手,很抱歉提出一个新手问题。

我正在使用 MNIST 数据集来研究一个简单的 CNN 模型。 首先,我将 MNIST 数据保存为 png 文件。

import cv2
import os

from keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

img_root = './images'
train = os.path.join(img_root, 'train')
test = os.path.join(img_root, 'test')

if not os.path.exists(img_root):
    os.mkdir(img_root)

if not os.path.exists(train):
    os.mkdir(train)

if not os.path.exists(test):
    os.mkdir(test)

# Save Train images
for i in range(x_train.shape[0]):
    img_dir = os.path.join(train, str(y_train[i]))
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    image_out = os.path.join(img_dir, str(i) + ".png")
    cv2.imwrite(image_out, x_train[i])

# Save Test images
for i in range(x_test.shape[0]):
    img_dir = os.path.join(test, str(y_test[i]))
    if not os.path.exists(img_dir):
        os.mkdir(img_dir)
    image_out = os.path.join(img_dir, str(i) + ".png")
    cv2.imwrite(image_out, x_test[i])

然后我使用三种不同的方法加载这些图像并对模型进行训练和测试。

方法1“cv_only”:使用cv2.imread()将所有图像加载到numpy数组。不要使用 tf.data。 结果是

纪元48/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0025 - 准确度:0.9992 - val_loss :0.0490 - val_accuracy:0.9937

纪元 49/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0042 - 精度:0.9990 - val_loss :0.0477 - val_accuracy:0.9924

纪元 50/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0033 - 准确度:0.9991 - val_loss :0.0485 - val_accuracy:0.9914

学习时间:199[s]

方法2“tf_cv”:使用tf.data.Dataset.from_tensor_slices()、map()和batch()方法制作数据集。在map()方法中,使用cv2.imread()来读取图像。 结果是

纪元48/50 469/469 [================================] - 32s 68ms/步 - 损耗:5.8638e-07 - 准确度: 1.0000 - val_loss:4.7863 - val_accuracy:0.4507

纪元 49/50 469/469 [================================] - 32s 68ms/步 - 损耗:4.6416e-07 - 准确度: 1.0000 - val_loss:4.8436 - val_accuracy:0.4514

纪元 50/50 469/469 [==============================] - 32s 69ms/步 - 损耗:3.6748e-07 - 准确度: 1.0000 - val_loss:4.8742 - val_accuracy:0.4517

学习时间:1638[s]

方法3“tf_only :使用tf.data.Dataset.from_tensor_slices()、map()和batch()方法制作数据集(与方法2相同)。但在map()方法内部,使用tf.io.read_file()来读取图像。 结果是

纪元48/50 469/469 [==============================] - 16s 33ms/步 - 损耗:8.5148e-07 - 准确度: 1.0000 - val_loss:5.4608 - val_accuracy:0.4065

纪元 49/50 469/469 [================================] - 16s 34ms/步 - 损耗:6.7230e-07 - 准确度: 1.0000 - val_loss:5.4721 - val_accuracy:0.4085

纪元 50/50 469/469 [==============================] - 15s 33ms/步 - 损耗:5.3065e-07 - 准确度: 1.0000 - val_loss:5.4845 - val_accuracy:0.4087

学习时间:700[s]

我在方法2和3中做错了什么。请帮忙。

这是我使用的模型的完整代码。


import numpy as np
import cv2
import glob
import datetime

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical

# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "tf_cv"

train_images = "./images/train/"
test_images = "./images/test/"

# network parameters
batch_size = 128

categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)


def make_sample_imgs(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
        Y.append(cat)
    return np.array(X), np.array(Y)


def make_sample(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(fname)
        Y.append(cat)
    return np.array(X), np.array(Y)


def process_path_cv2(image_path, label):

    def load_image(path):
        image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
        image_gray = image_gray.astype('float32')/255
        image_gray = tf.expand_dims(image_gray, axis=-1)
        return image_gray

    image = tf.numpy_function(load_image, [image_path], tf.float32)
    return image, label


def process_path_tf(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_image(image, channels=1)
    image = tf.image.convert_image_dtype(image, tf.float32)
    return image, label


start_time = datetime.datetime.now()

train = []
test = []

for idx, cat in enumerate(categories):
    image_dir = train_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        train.append((idx, f))
    print("Train ", image_dir, "append ", len(files), "files!")

for idx, cat in enumerate(categories):
    image_dir = test_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        test.append((idx, f))
    print("Test ", image_dir, "append ", len(files), "files!")


if data_load_method == "cv_only":
    x_train, y_train = make_sample_imgs(train)
    x_test, y_test = make_sample_imgs(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)

    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    x_train, y_train = make_sample(train)
    x_test, y_test = make_sample(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    train_datasets = tf.data.Dataset.from_tensor_slices((x_train, y_train))
    test_datasets = tf.data.Dataset.from_tensor_slices((x_test, y_test))

    if data_load_method == "tf_cv":
        train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)

    else:
        train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)

    # train_datasets = train_datasets.cache()
    # test_datasets = test_datasets.cache()

    train_datasets = train_datasets.batch(batch_size)
    test_datasets = test_datasets.batch(batch_size)
    train_datasets = train_datasets.prefetch(tf.data.AUTOTUNE)
    test_datasets = test_datasets.prefetch(tf.data.AUTOTUNE)

else:
    print("Method not defined!")
    exit()


model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(512, activation="relu"))

model.add(Dense(10, activation="softmax"))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

if data_load_method == "cv_only":
    model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    model.fit(train_datasets, epochs=50, batch_size=batch_size, validation_data=test_datasets)

end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')

如果我使用

train_datasets = train_datasets.cache()

test_datasets = test_datasets.cache()

那么时间可以减少到150秒左右,但是币值也会下降。 我正在使用 python 3.10、keras 2.10.0、tensorflow-gpu 2.10.0

很抱歉代码混乱。我还在学习编码。

python tensorflow keras mnist opencv
1个回答
0
投票

我自己解决了。 总结是:

  1. tf.image.decode_image()
    更改为
    tf.image.decode_png()
    ,因为我使用的是 png 文件。
  2. 打乱数据。
    train_datasets = train_datasets.shuffle(15000)
    test_datasets = test_datasets.shuffle(4000)
    。在之前的代码中,没有洗牌。

现在这三种方法都给出几乎相同的

val_loss:
val_accuracy:

新代码如下。

import numpy as np
import cv2
import glob
import datetime

import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical

# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "cv_only"

train_images = "./images/train/"
test_images = "./images/test/"

# network parameters
batch_size = 128

categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)


def make_sample_imgs(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
        Y.append(cat)
    return np.array(X), np.array(Y)


def make_sample(files):
    global X, Y
    X = []
    Y = []
    for cat, fname in files:
        X.append(fname)
        Y.append(cat)
    return np.array(X), np.array(Y)


def process_path_cv2(image_path, label):

    def load_image(path):
        image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
        image_gray = image_gray.astype('float32')/255
        image_gray = tf.expand_dims(image_gray, axis=-1)
        return image_gray

    image = tf.numpy_function(load_image, [image_path], tf.float32)
    return image, label


def process_path_tf(image_path, label):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_png(image, channels=1)  # 
    image = tf.image.resize(image, [28, 28])    # Must use this, increase the val_accuracy
    image = tf.image.convert_image_dtype(image, tf.float32)
    image /= 255.0
    return image, label


start_time = datetime.datetime.now()

train = []
test = []

for idx, cat in enumerate(categories):
    image_dir = train_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        train.append((idx, f))
    print("Train ", image_dir, "append ", len(files), "files!")

for idx, cat in enumerate(categories):
    image_dir = test_images + cat
    files = glob.glob(image_dir + "/*.png")
    for f in files:
        test.append((idx, f))
    print("Test ", image_dir, "append ", len(files), "files!")


if data_load_method == "cv_only":
    x_train, y_train = make_sample_imgs(train)
    x_test, y_test = make_sample_imgs(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)

    x_train = x_train.astype('float32') / 255
    x_test = x_test.astype('float32') / 255

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    x_train, y_train = make_sample(train)
    x_test, y_test = make_sample(test)

    # convert to one-hot vector
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    train_datasets = tf.data.Dataset.from_tensor_slices((x_train, tf.cast(y_train, tf.int64)))
    test_datasets = tf.data.Dataset.from_tensor_slices((x_test, tf.cast(y_test, tf.int64)))

    if data_load_method == "tf_cv":
        train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)

    else:
        train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
        test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)

    train_datasets = train_datasets.cache()
    test_datasets = test_datasets.cache()
    train_datasets = train_datasets.shuffle(15000)
    test_datasets = test_datasets.shuffle(4000)
    train_datasets = train_datasets.batch(batch_size)
    test_datasets = test_datasets.batch(batch_size)
    test_datasets = test_datasets.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

else:
    print("Method not defined!")
    exit()


model = Sequential()

model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())

model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(512, activation="relu"))

model.add(Dense(10, activation="softmax"))

model.summary()

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

if data_load_method == "cv_only":
    model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))

elif data_load_method == "tf_cv" or data_load_method == "tf_only":
    model.fit(train_datasets, epochs=50, validation_data=test_datasets)

end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')
© www.soinside.com 2019 - 2024. All rights reserved.