我正在尝试学习如何对来自时尚 MNIST 的一批数据使用子类 API。但我一遍又一遍地遇到相同的形状错误,但我找不到它来自哪里。 这是我的代码:
import tensorflow as tf
from tensorflow import keras
import os
import sys
assert sys.version_info >= (3, 5)
import sklearn
assert sklearn.__version__ >= "0.20"
import numpy as np
import matplotlib
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.
#Note that I am not using this part of the code for now because I
#need to debug it first:
#class Block(keras.layers.Layer):
# def __init__(self, n_layers, n_neurons, **kwargs):
# super().__init__(**kwargs)
# self.hidden = [keras.layers.Dense(n_neurons, #activation="relu")
# for _ in range(n_layers)]
# def call(self, inputs):
# Z = inputs
# for layer in self.hidden:
# Z = layer(Z)
# return inputs + Z
class Modeling(keras.Model):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.Flatten = keras.layers.Flatten(input_shape=[28,28])
self.dense1 = keras.layers.Dense(30, activation="relu")
self.out = keras.layers.Dense(10, activation="softmax")
def call(self, inputs):
Z = self.dense1(self.Flatten(inputs))
# for _ in range (1 + 3):
# Z = self.block1(Z)
# Z = self.block2(Z)
return self.out(Z)
#This one shows that there is multiple output_shapes :
model = Modeling()
model.build([28,28])
model.summary()
model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=['accuracy'])
model.fit(X_train, y_train, epochs=1, validation_data=(X_valid, y_valid))
我不断收到的信息是,简而言之:
ValueError
Traceback
1 model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=['accuracy'])
2
----> 3 model.fit(X_train, y_train, epochs=1, validation_data=(X_valid, y_valid))
ValueError: Input 0 of layer "dense" is incompatible with the layer: expected axis -1 of input shape to have value 28, but received input with shape (None, 784)
Call arguments received:
• inputs=tf.Tensor(shape=(None, 28, 28), dtype=float32)
如何解决?
错误消息本身告诉您,您的输入的形状与您定义的模型期望的形状不同。因此,密集层期望输入数据具有一定数量的特征,但得到的数据不同。
我不确定使用此处使用的结构对您是否重要:
class Modeling(keras.Model):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.Flatten = keras.layers.Flatten(input_shape=[28,28])
self.dense1 = keras.layers.Dense(30, activation="relu")
self.out = keras.layers.Dense(10, activation="softmax")
def call(self, inputs):
Z = self.dense1(self.Flatten(inputs))
# for _ in range (1 + 3):
# Z = self.block1(Z)
# Z = self.block2(Z)
return self.out(Z)
当我将其替换为以下内容时:
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28,28]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
它有效。
完整的工作代码:
import tensorflow as tf
from tensorflow import keras
import os
import sys
import numpy as np
import matplotlib
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.
model = keras.models.Sequential([
keras.layers.Flatten(input_shape=[28,28]),
keras.layers.Dense(30, activation="relu"),
keras.layers.Dense(10, activation="softmax")
])
model.compile(optimizer="sgd", loss="sparse_categorical_crossentropy", metrics=['accuracy'])
model.fit(X_train, y_train, epochs=1, validation_data=(X_valid, y_valid))
给我
loss: 0.8309 - acc: 0.7282 - val_loss: 0.5908 - val_acc: 0.7994