代码如下:
import math
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras import Model
class CustomData(tf.keras.utils.Sequence):
def __init__(self, image_data, label_data, batch_size):
self._image_data = np.float32((image_data / 127.5) - 1)
self._label_data = tf.squeeze(label_data)
self._batch_size = batch_size
self._num_batches = math.ceil(image_data.shape[0] / batch_size)
def __len__(self):
return self._num_batches
def __getitem__(self, idx):
idx1 = idx * self.batch_size
idx2 = (idx + 1) * self.batch_size
batch_x = self._image_data[idx1:idx2]
batch_y = self._label_data[idx1:idx2]
return batch_x, batch_y
@property
def batch_size(self):
return self._batch_size
class SparseSoftmaxCrossEntropy(keras.losses.Loss):
def __init__(self):
super().__init__()
def call(self, y_true, y_pred):
# the rank of y_true becomes 2 and I don't know why
#y_true = tf.cast(tf.squeeze(y_true), dtype=tf.int32) # this line can solve the problem, but I'm just wondering why the dimension of y_true is expanded
return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred))
class CustomModel(Model):
def __init__(self, num_outputs):
super().__init__()
self.global_avg_pool = GlobalAveragePooling2D()
self.classifier = Dense(units=num_outputs)
def call(self, inputs):
x = self.global_avg_pool(inputs)
x = self.classifier(x)
return x
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
train_data = CustomData(image_data=x_train, label_data=y_train, batch_size=2)
print(train_data[0][1].shape) # the rank of y_true is 1
model = CustomModel(num_outputs=10)
loss = SparseSoftmaxCrossEntropy()
model.compile(optimizer="adam", loss=loss)
model.fit(train_data)
报错如下:
ValueError: `labels.shape.rank` must equal `logits.shape.rank - 1`. Received: labels.shape=(None, 1) of rank 2 and logits.shape=(None, 10) of rank 2
我知道错误是在损失函数中引发的,可以通过 tf.squeeze 解决。 但是我不明白的是为什么y_true的维度被model.fit扩大了。 (从 (batch_size,) 到 (batch_size, 1)) 有人能告诉我为什么吗?谢谢。
它由 keras/keras/engine/compile_utils.py 中定义的函数 match_dtype_and_rank 执行
def match_dtype_and_rank(y_t, y_p, sw):
"""Match dtype and rank of predictions."""
if y_t.shape.rank == 1 and y_p.shape.rank == 2:
y_t = tf.expand_dims(y_t, axis=-1)