我是机器学习和Python的新手,很抱歉提出一个新手问题。
我正在使用 MNIST 数据集来研究一个简单的 CNN 模型。 首先,我将 MNIST 数据保存为 png 文件。
import cv2
import os
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
img_root = './images'
train = os.path.join(img_root, 'train')
test = os.path.join(img_root, 'test')
if not os.path.exists(img_root):
os.mkdir(img_root)
if not os.path.exists(train):
os.mkdir(train)
if not os.path.exists(test):
os.mkdir(test)
# Save Train images
for i in range(x_train.shape[0]):
img_dir = os.path.join(train, str(y_train[i]))
if not os.path.exists(img_dir):
os.mkdir(img_dir)
image_out = os.path.join(img_dir, str(i) + ".png")
cv2.imwrite(image_out, x_train[i])
# Save Test images
for i in range(x_test.shape[0]):
img_dir = os.path.join(test, str(y_test[i]))
if not os.path.exists(img_dir):
os.mkdir(img_dir)
image_out = os.path.join(img_dir, str(i) + ".png")
cv2.imwrite(image_out, x_test[i])
然后我使用三种不同的方法加载这些图像并对模型进行训练和测试。
方法1“cv_only”:使用cv2.imread()将所有图像加载到numpy数组。不要使用 tf.data。 结果是
纪元48/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0025 - 准确度:0.9992 - val_loss :0.0490 - val_accuracy:0.9937
纪元 49/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0042 - 精度:0.9990 - val_loss :0.0477 - val_accuracy:0.9924
纪元 50/50 469/469 [==============================] - 2s 5ms/步 - 损失:0.0033 - 准确度:0.9991 - val_loss :0.0485 - val_accuracy:0.9914
学习时间:199[s]
方法2“tf_cv”:使用tf.data.Dataset.from_tensor_slices()、map()和batch()方法制作数据集。在map()方法中,使用cv2.imread()来读取图像。 结果是
纪元48/50 469/469 [================================] - 32s 68ms/步 - 损耗:5.8638e-07 - 准确度: 1.0000 - val_loss:4.7863 - val_accuracy:0.4507
纪元 49/50 469/469 [================================] - 32s 68ms/步 - 损耗:4.6416e-07 - 准确度: 1.0000 - val_loss:4.8436 - val_accuracy:0.4514
纪元 50/50 469/469 [==============================] - 32s 69ms/步 - 损耗:3.6748e-07 - 准确度: 1.0000 - val_loss:4.8742 - val_accuracy:0.4517
学习时间:1638[s]
方法3“tf_only :使用tf.data.Dataset.from_tensor_slices()、map()和batch()方法制作数据集(与方法2相同)。但在map()方法内部,使用tf.io.read_file()来读取图像。 结果是
纪元48/50 469/469 [==============================] - 16s 33ms/步 - 损耗:8.5148e-07 - 准确度: 1.0000 - val_loss:5.4608 - val_accuracy:0.4065
纪元 49/50 469/469 [================================] - 16s 34ms/步 - 损耗:6.7230e-07 - 准确度: 1.0000 - val_loss:5.4721 - val_accuracy:0.4085
纪元 50/50 469/469 [==============================] - 15s 33ms/步 - 损耗:5.3065e-07 - 准确度: 1.0000 - val_loss:5.4845 - val_accuracy:0.4087
学习时间:700[s]
我在方法2和3中做错了什么。请帮忙。
这是我使用的模型的完整代码。
import numpy as np
import cv2
import glob
import datetime
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "tf_cv"
train_images = "./images/train/"
test_images = "./images/test/"
# network parameters
batch_size = 128
categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)
def make_sample_imgs(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
Y.append(cat)
return np.array(X), np.array(Y)
def make_sample(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(fname)
Y.append(cat)
return np.array(X), np.array(Y)
def process_path_cv2(image_path, label):
def load_image(path):
image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
image_gray = image_gray.astype('float32')/255
image_gray = tf.expand_dims(image_gray, axis=-1)
return image_gray
image = tf.numpy_function(load_image, [image_path], tf.float32)
return image, label
def process_path_tf(image_path, label):
image = tf.io.read_file(image_path)
image = tf.image.decode_image(image, channels=1)
image = tf.image.convert_image_dtype(image, tf.float32)
return image, label
start_time = datetime.datetime.now()
train = []
test = []
for idx, cat in enumerate(categories):
image_dir = train_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
train.append((idx, f))
print("Train ", image_dir, "append ", len(files), "files!")
for idx, cat in enumerate(categories):
image_dir = test_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
test.append((idx, f))
print("Test ", image_dir, "append ", len(files), "files!")
if data_load_method == "cv_only":
x_train, y_train = make_sample_imgs(train)
x_test, y_test = make_sample_imgs(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
x_train, y_train = make_sample(train)
x_test, y_test = make_sample(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
train_datasets = tf.data.Dataset.from_tensor_slices((x_train, y_train))
test_datasets = tf.data.Dataset.from_tensor_slices((x_test, y_test))
if data_load_method == "tf_cv":
train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
else:
train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.AUTOTUNE)
# train_datasets = train_datasets.cache()
# test_datasets = test_datasets.cache()
train_datasets = train_datasets.batch(batch_size)
test_datasets = test_datasets.batch(batch_size)
train_datasets = train_datasets.prefetch(tf.data.AUTOTUNE)
test_datasets = test_datasets.prefetch(tf.data.AUTOTUNE)
else:
print("Method not defined!")
exit()
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
if data_load_method == "cv_only":
model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
model.fit(train_datasets, epochs=50, batch_size=batch_size, validation_data=test_datasets)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')
如果我使用
train_datasets = train_datasets.cache()
test_datasets = test_datasets.cache()
那么时间可以减少到150秒左右,但是币值也会下降。 我正在使用 python 3.10、keras 2.10.0、tensorflow-gpu 2.10.0
很抱歉代码混乱。我还在学习编码。
我自己解决了。 总结是:
tf.image.decode_image()
更改为 tf.image.decode_png()
,因为我使用的是 png 文件。train_datasets = train_datasets.shuffle(15000)
,test_datasets = test_datasets.shuffle(4000)
。在之前的代码中,没有洗牌。现在这三种方法都给出几乎相同的
val_loss:
和 val_accuracy:
。
新代码如下。
import numpy as np
import cv2
import glob
import datetime
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.layers import Dense, Flatten
from keras.utils import to_categorical
# Valid Options are "cv_only", "tf_cv" and "tf_only"
data_load_method = "cv_only"
train_images = "./images/train/"
test_images = "./images/test/"
# network parameters
batch_size = 128
categories = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]
num_classes = len(categories)
def make_sample_imgs(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(cv2.imread(fname, cv2.IMREAD_GRAYSCALE))
Y.append(cat)
return np.array(X), np.array(Y)
def make_sample(files):
global X, Y
X = []
Y = []
for cat, fname in files:
X.append(fname)
Y.append(cat)
return np.array(X), np.array(Y)
def process_path_cv2(image_path, label):
def load_image(path):
image_gray = cv2.imread(path.decode("utf-8"), cv2.IMREAD_GRAYSCALE)
image_gray = image_gray.astype('float32')/255
image_gray = tf.expand_dims(image_gray, axis=-1)
return image_gray
image = tf.numpy_function(load_image, [image_path], tf.float32)
return image, label
def process_path_tf(image_path, label):
image = tf.io.read_file(image_path)
image = tf.image.decode_png(image, channels=1) #
image = tf.image.resize(image, [28, 28]) # Must use this, increase the val_accuracy
image = tf.image.convert_image_dtype(image, tf.float32)
image /= 255.0
return image, label
start_time = datetime.datetime.now()
train = []
test = []
for idx, cat in enumerate(categories):
image_dir = train_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
train.append((idx, f))
print("Train ", image_dir, "append ", len(files), "files!")
for idx, cat in enumerate(categories):
image_dir = test_images + cat
files = glob.glob(image_dir + "/*.png")
for f in files:
test.append((idx, f))
print("Test ", image_dir, "append ", len(files), "files!")
if data_load_method == "cv_only":
x_train, y_train = make_sample_imgs(train)
x_test, y_test = make_sample_imgs(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
x_train = x_train.reshape(-1, 28, 28, 1)
x_test = x_test.reshape(-1, 28, 28, 1)
x_train = x_train.astype('float32') / 255
x_test = x_test.astype('float32') / 255
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
x_train, y_train = make_sample(train)
x_test, y_test = make_sample(test)
# convert to one-hot vector
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)
train_datasets = tf.data.Dataset.from_tensor_slices((x_train, tf.cast(y_train, tf.int64)))
test_datasets = tf.data.Dataset.from_tensor_slices((x_test, tf.cast(y_test, tf.int64)))
if data_load_method == "tf_cv":
train_datasets = train_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
test_datasets = test_datasets.map(process_path_cv2, num_parallel_calls=tf.data.AUTOTUNE)
else:
train_datasets = train_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
test_datasets = test_datasets.map(process_path_tf, num_parallel_calls=tf.data.experimental.AUTOTUNE)
train_datasets = train_datasets.cache()
test_datasets = test_datasets.cache()
train_datasets = train_datasets.shuffle(15000)
test_datasets = test_datasets.shuffle(4000)
train_datasets = train_datasets.batch(batch_size)
test_datasets = test_datasets.batch(batch_size)
test_datasets = test_datasets.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
else:
print("Method not defined!")
exit()
model = Sequential()
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", activation="relu", input_shape=(28, 28, 1)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(Conv2D(filters=128, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(BatchNormalization())
model.add(Conv2D(filters=256, kernel_size=(3, 3), padding="same", strides=2, activation="relu"))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(10, activation="softmax"))
model.summary()
model.compile(loss='categorical_crossentropy',
optimizer='adam',
metrics=['accuracy'])
if data_load_method == "cv_only":
model.fit(x_train, y_train, epochs=50, batch_size=batch_size, validation_data=(x_test, y_test))
elif data_load_method == "tf_cv" or data_load_method == "tf_only":
model.fit(train_datasets, epochs=50, validation_data=test_datasets)
end_time = datetime.datetime.now()
time_diff = (end_time - start_time)
learning_time = time_diff.total_seconds()
print(f'Learning time: {int(learning_time)}[s]')