在下面的代码中,我将
Predicted Output
的 TF Keras Model
与相应的值 calculated Manually
(Softmax Activation
使用 Numpy
实现)进行比较。
令人惊讶的是,它们并不相同。我是不是错过了什么?
另外,还有一个警告,
用户警告:“
收到sparse_categorical_crossentropy
,但是from_logits=True
论证是由 sigmoid 或 softmax 激活,因此不代表 logits。曾是 这是有意的吗?" '"output
已收到sparse_categorical_crossentropy
,但是'from_logits=True
这个警告是什么意思?这是警告不匹配的原因吗?
import tensorflow as tf
import numpy as np
inputs = tf.keras.Input(shape=(784,), name="digits")
x1 = tf.keras.layers.Dense(64, activation="relu")(inputs)
x2 = tf.keras.layers.Dense(64, activation="relu")(x1)
outputs = tf.keras.layers.Dense(10, name="predictions", activation = 'softmax')(x2)
model = tf.keras.Model(inputs=inputs, outputs=outputs)
# Instantiate an optimizer.
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-3)
# Instantiate a loss function.
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# Prepare the training dataset.
batch_size = 64
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = np.reshape(x_train, (-1, 784))
x_test = np.reshape(x_test, (-1, 784))
# Normalize the values of Pixels of Image. Else, Calculation of Softmax results in NaN
x_train = x_train/255.0
x_test = x_test/255.0
# Reserve 10,000 samples for validation.
x_val = x_train[-10000:]
y_val = y_train[-10000:]
x_train = x_train[:-10000]
y_train = y_train[:-10000]
# Prepare the training dataset.
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(batch_size)
# Prepare the validation dataset.
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val))
val_dataset = val_dataset.batch(batch_size)
epochs = 2
for epoch in range(epochs):
print("\nStart of epoch %d" % (epoch,))
# Iterate over the batches of the dataset.
for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
x_batch_train = tf.cast(x_batch_train, tf.float32)
with tf.GradientTape() as tape:
logits = model(x_batch_train, training=True) # Logits for this minibatch
# Compute the loss value for this minibatch.
loss_value = loss_fn(y_batch_train, logits)
grads = tape.gradient(loss_value, model.trainable_weights)
Initial_Weights_1st_Hidden_Layer = model.trainable_weights[0]
Initial_Weights_2nd_Hidden_Layer = model.trainable_weights[2]
Initial_Weights_Output_Layer = model.trainable_weights[4]
Initial_Bias_1st_Hidden_Layer = model.trainable_weights[1]
Initial_Bias_2nd_Hidden_Layer = model.trainable_weights[3]
Initial_Bias_Output_Layer = model.trainable_weights[5]
# Implementing Relu Activation Function using Numpy
def Relu_Activation(Input):
return np.maximum(Input, 0)
#Compute Softmax Activation Function using Numpy
def Softmax_Activation(Input):
return np.exp(Input) / np.sum(np.exp(Input), axis=0)
# Calculations
Input_to_1st_Hidden_Layer = x_batch_train @ Initial_Weights_1st_Hidden_Layer + \
Initial_Bias_1st_Hidden_Layer
Output_Of_1st_Hidden_Layer = Relu_Activation(Input_to_1st_Hidden_Layer)
Input_to_2nd_Hidden_Layer = Output_Of_1st_Hidden_Layer @ Initial_Weights_2nd_Hidden_Layer + \
Initial_Bias_2nd_Hidden_Layer
Output_Of_2nd_Hidden_Layer = Relu_Activation(Input_to_2nd_Hidden_Layer)
Input_to_Final_Layer = Output_Of_2nd_Hidden_Layer @ Initial_Weights_Output_Layer + \
Initial_Bias_Output_Layer
# Softmax Activation Function has been used in the Output/Final Layer
Calculated_Y_Pred = Softmax_Activation(Input_to_Final_Layer)
# Log every 200 batches.
if step == 200:
print('\n Y_Pred = ', logits[0:2])
print('\n Calculated_Y_Pred = ', Calculated_Y_Pred[0:2])
上述代码的输出如下所示:
Start of epoch 0
/home/mothukuru/anaconda3/lib/python3.7/site-packages/tensorflow/python/keras/backend.py:4930: UserWarning: "`sparse_categorical_crossentropy` received `from_logits=True`, but the `output` argument was produced by a sigmoid or softmax activation and thus does not represent logits. Was this intended?"
'"`sparse_categorical_crossentropy` received `from_logits=True`, but '
Y_Pred = tf.Tensor(
[[0.07784345 0.13746074 0.09005958 0.08652461 0.07746054 0.12440132
0.10698392 0.07508533 0.07116801 0.15301245]
[0.0656803 0.08119027 0.09362638 0.10353054 0.12599334 0.10456354
0.1271341 0.08623642 0.08971243 0.12233265]], shape=(2, 10), dtype=float32)
Calculated_Y_Pred = [[0.01511016 0.02304603 0.01961761 0.01425961 0.01025286 0.02124614
0.01223315 0.01411171 0.01178642 0.01445299]
[0.01271159 0.01357185 0.02033444 0.01701196 0.01662761 0.01780546
0.01449438 0.01615969 0.01481383 0.01152103]]
Start of epoch 1
Y_Pred = tf.Tensor(
[[0.12411885 0.08815324 0.05189805 0.07208851 0.11877609 0.06383732
0.13067529 0.08087374 0.09073243 0.17884655]
[0.07584718 0.079349 0.06285123 0.1089478 0.09581042 0.09398626
0.12189291 0.10832074 0.08284932 0.17014521]], shape=(2, 10), dtype=float32)
Calculated_Y_Pred = [[0.02525741 0.01648222 0.01210153 0.012623 0.01642019 0.01224833
0.01583157 0.01587343 0.01606088 0.01728726]
[0.01414648 0.01359805 0.01343262 0.01748529 0.01214003 0.01652816
0.01353526 0.01948644 0.01344168 0.01507382]]
首先,如果保留这个整数目标或标签,则应该使用稀疏_分类_准确度来获得准确度,并使用稀疏_分类_交叉熵作为损失函数。但是,如果您将整数标签转换为 one-hot 编码向量,那么您应该使用 categorical_accuracy 来获得准确性,并使用 categorical_crossentropy 来获得损失函数。由于这些数据集具有整数标签,因此您可以选择稀疏_分类,也可以将标签转换为 one-hot 以使用分类。
第二,如果将outputs = keras.layers.Dense(102,activation='softmax')(x)设置为最后一层,您将获得概率分数。但是如果你设置outputs = keras.layers.Dense(102)(x),那么你将得到logits。因此,如果您设置activations='softmax',那么您不应该使用from_logit = True。例如,在上面的代码中,您应该执行以下操作(这里有一些理论):
...
(a)
# Use softmax activation (no logits output)
outputs = keras.layers.Dense(102, activation='softmax')(x)
...
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
metrics=[keras.metrics.Accuracy()],
)
或者,
(b)
# no activation, output will be logits
outputs = keras.layers.Dense(102)(x)
...
model.compile(
optimizer=keras.optimizers.Adam(),
loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
metrics=[keras.metrics.Accuracy()],
)
第三,keras使用字符串标识符,例如metrics=['acc']、optimizer='adam'。但就您而言,当您提到特定于损失函数时,您需要更具体一些。因此,如果您的目标是整数,则应选择 keras.metrics.SparseCategoricalAccuracy(),而不是 keras.metrics.Accuracy();如果您的目标是单热编码向量,则应选择 keras.metrics.CategoricalAccuracy()。
这是一个端到端的示例。请注意,我会将整数标签转换为 one-hot 编码向量(现在,这对我来说是一个偏好问题)。另外,我想要最后一层的概率(不是 logits),这意味着 from_logits = False。对于所有这些,我需要在训练中选择以下参数:
# use softmax to get probabilities
outputs = keras.layers.Dense(102,
activation='softmax')(x)
# so no logits, set it false (FYI, by default it already false)
loss = keras.losses.CategoricalCrossentropy(from_logits=False),
# specify the metrics properly
metrics = keras.metrics.CategoricalAccuracy(),
让我们完成整个代码。
import tensorflow_datasets as tfds
tfds.disable_progress_bar()
data, ds_info = tfds.load('oxford_flowers102',
with_info=True, as_supervised=True)
train_ds, valid_ds, test_ds = data['train'], data['validation'], data['test']
NUM_CLASSES = ds_info.features["label"].num_classes
train_size = len(data['train'])
batch_size = 64
img_size = 120
预处理和增强
import tensorflow as tf
# pre-process functions
def normalize_resize(image, label):
image = tf.cast(image, tf.float32)
image = tf.divide(image, 255)
image = tf.image.resize(image, (img_size, img_size))
label = tf.one_hot(label , depth=NUM_CLASSES) # int to one-hot
return image, label
# augmentation
def augment(image, label):
image = tf.image.random_flip_left_right(image)
return image, label
train = train_ds.map(normalize_resize).cache().map(augment).shuffle(100).\
batch(batch_size).repeat()
valid = valid_ds.map(normalize_resize).cache().batch(batch_size)
test = test_ds.map(normalize_resize).cache().batch(batch_size)
型号
from tensorflow import keras
base_model = keras.applications.Xception(
weights='imagenet',
input_shape=(img_size, img_size, 3),
include_top=False)
base_model.trainable = False
inputs = keras.Input(shape=(img_size, img_size, 3))
x = base_model(inputs, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
outputs = keras.layers.Dense(NUM_CLASSES, activation='softmax')(x)
model = keras.Model(inputs, outputs)
好吧,另外,这里我喜欢使用两个指标来计算 top-1 和 top-3 准确度。
model.compile(optimizer=keras.optimizers.Adam(),
loss=keras.losses.CategoricalCrossentropy(),
metrics=[
keras.metrics.TopKCategoricalAccuracy(k=3, name='acc_top3'),
keras.metrics.TopKCategoricalAccuracy(k=1, name='acc_top1')
])
model.fit(train, steps_per_epoch=train_size // batch_size,
epochs=20, validation_data=valid, verbose=2)
...
Epoch 19/20
15/15 - 2s - loss: 0.2808 - acc_top3: 0.9979 - acc_top1: 0.9917 -
val_loss: 1.5025 - val_acc_top3: 0.8147 - val_acc_top1: 0.6186
Epoch 20/20
15/15 - 2s - loss: 0.2743 - acc_top3: 0.9990 - acc_top1: 0.9885 -
val_loss: 1.4948 - val_acc_top3: 0.8147 - val_acc_top1: 0.6255
评估
# evaluate on test set
model.evaluate(test, verbose=2)
97/97 - 18s - loss: 1.6482 - acc_top3: 0.7733 - acc_top1: 0.5994
[1.648208498954773, 0.7732964754104614, 0.5994470715522766]