在张量流和 keras 中使用梯度

Question

我正在研究一个 AI NLP 模型，它将文本分为 6 类。我不能透露该模型的具体用途，抱歉。这是：-

Input = keras.layers.Input((1368, ))
embedding = keras.layers.Embedding(1386, 50)(Input)

# The model will have two branches, in one we will be using 1D convents, on the other we will be using RNNs.

# The convent part
convent_1 = keras.layers.Conv1D(128, 10, activation = "relu")(embedding)
convent_2 = keras.layers.Conv1D(64, 7, activation = "relu")(convent_1)
convent_3 = keras.layers.Conv1D(32, 5, activation = "relu")(convent_2)
maxpool_1 = keras.layers.MaxPool1D(2)(convent_3)
maxpool_2 = keras.layers.MaxPool1D(2)(maxpool_1)
flatten_1 = keras.layers.Flatten()(maxpool_2)
reducing_size_dense = keras.layers.Dense(128, activation = "relu")(flatten_1) # To match the size of the flatten layer of rnn part.

# The RNN part
rnn_1 = keras.layers.GRU(128, activation = "relu", return_sequences = True)(embedding)
rnn_2 = keras.layers.GRU(128, activation = "relu", return_sequences = False)(rnn_1)
flatten_2 = keras.layers.Flatten()(rnn_2)

# The classifier
sum_flatten = keras.layers.Add()([reducing_size_dense, flatten_2])
dense_1 = keras.layers.Dense(128, activation = "relu")(sum_flatten)
dense_2 = keras.layers.Dense(128, activation = "relu")(dense_1)
dense_3 = keras.layers.Dense(128, activation = "relu")(dense_2)
dense_4 = keras.layers.Dense(128, activation = "relu")(dense_3)
output_layer = keras.layers.Dense(6, activation = "softmax")(dense_4)
# We are using kappa quadratic loss function. Therefore we will have to take the argmax of the output layer. 
# Defining the argmax layer

class Argmax_layer(keras.layers.Layer):

    def call(self, input): # Keras layers are called by the call method.
        return (tf.argmax(input, axis = -1) + 1) # We have to add 1 because the indices are in the range 0-5. We want them to be in the range 1-6.

argmax_layer = Argmax_layer()(output_layer)


model = keras.Model(Input, argmax_layer)
keras.utils.plot_model(model, show_shapes=True)

这是问题中描述的 keras 模型。当我实施训练时出现问题：-

callback_list = [keras.callbacks.EarlyStopping(monitor = "val_accuracy", patience = 10),
                 keras.callbacks.ModelCheckpoint(monitor = "val_accuracy", save_best_only = True, filepath = "scoring_model.keras"),
                 keras.callbacks.ReduceLROnPlateau(monitor = "val_accuracy", patience = 5, factor = 0.25)]

model.compile(optimizer = "rmsprop", loss = kappa_weighted_quad_loss, metrics = ["accuracy"])
history_model = model.fit(training_data, training_targets, validation_data = (validation_data, validation_targets), epochs = 100, callbacks = callback_list, batch_size = 20)
plot = dm.plotter(history = history_model.history)
plot.plot_loss()
plot.plot_acc()

注意，

dm.plot_acc()

和

dm.plot_loss()

是我个人的方法，可以帮助我预处理数据和管理AI模型。我通常从这些简单的方法开始，然后逐渐倾向于张量板。你可以看到我已经实现了自定义损失函数。这是损失函数：-


# The evaluation of the result uses the weighted kappa, thus we will be defining that loss function.
def kappa_weighted_quad_loss(targets, preds):

    targets = targets - 1 # Because the targets and preds will be int he range 1-6, we will reduce 1 from each element and bring it to the range 0-5
    preds = preds - 1

    targets = tf.cast(targets, tf.int16)
    preds = tf.cast(preds, tf.int16)

    confusion_matrix = np.zeros(shape = (6, 6))
    for i, j in zip(targets, preds):
        confusion_matrix[i, j] += 1

    w_i_j = lambda i, j: (i - j)**2/25 # There are 6 categories so N = 6 and N - 1 = 5
    O_i_dot = lambda i: np.sum(confusion_matrix[i, :])
    O_dot_j = lambda j: np.sum(confusion_matrix[:, j])
    E_i_j = lambda i, j: np.outer(O_i_dot(i) , O_dot_j(j)) / np.sum(confusion_matrix)

    numerator = np.sum([w_i_j(i, j) * confusion_matrix[i, j] for i in range(6) for j in range(6)])
    denominator = np.sum([w_i_j(i, j) * E_i_j(i , j) for i in range(6) for j in range(6)])
    return 1 - numerator/denominator

它是多类分类的 Cohen 加权 kappa 损失。我在 GPU 上工作时遇到的是这个错误：-

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[41], line 6
      1 callback_list = [keras.callbacks.EarlyStopping(monitor = "val_accuracy", patience = 10),
      2                  keras.callbacks.ModelCheckpoint(monitor = "val_accuracy", save_best_only = True, filepath = "scoring_model.keras"),
      3                  keras.callbacks.ReduceLROnPlateau(monitor = "val_accuracy", patience = 5, factor = 0.25)]
      5 model.compile(optimizer = "rmsprop", loss = kappa_weighted_quad_loss, metrics = ["accuracy"])
----> 6 history_model = model.fit(training_data, training_targets, validation_data = (validation_data, validation_targets), epochs = 100, callbacks = callback_list, batch_size = 20)
      7 plot = dm.plotter(history = history_model.history)
      8 plot.plot_loss()

File /opt/conda/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:122, in filter_traceback.<locals>.error_handler(*args, **kwargs)
    119     filtered_tb = _process_traceback_frames(e.__traceback__)
    120     # To get the full stack trace, call:
    121     # `keras.config.disable_traceback_filtering()`
--> 122     raise e.with_traceback(filtered_tb) from None
    123 finally:
    124     del filtered_tb

File /opt/conda/lib/python3.10/site-packages/keras/src/optimizers/base_optimizer.py:662, in BaseOptimizer._filter_empty_gradients(self, grads, vars)
    659         missing_grad_vars.append(v.name)
    661 if not filtered_grads:
--> 662     raise ValueError("No gradients provided for any variable.")
    663 if missing_grad_vars:
    664     warnings.warn(
    665         "Gradients do not exist for variables "
    666         f"{list(reversed(missing_grad_vars))} when minimizing the loss."
    667         " If using `model.compile()`, did you forget to provide a "
    668         "`loss` argument?"
    669     )

ValueError: No gradients provided for any variable.

我无法正确猜测这个错误的原因，但张量流似乎无法使用

tf.GradientTape

和其他梯度测量技术来计算梯度。

我认为这是因为停产了

Argmax_layer

但我对这个解释没有信心。有没有办法手动计算梯度或其他方法？

Answer 1

尝试将您的损失移植到 TensorFlow。我无法通过重现您的案例来测试它，但最糟糕的是它会为您指明正确的方向：

def kappa_weighted_quad_loss(targets, preds):

targets = targets - 1 # Because the targets and preds will be int he range 1-6, we will reduce 1 from each element and bring it to the range 0-5
preds = preds - 1

targets = tf.cast(targets, tf.int16)
preds = tf.cast(preds, tf.int16)

confusion_matrix = tf.math.confusion_matrix(targets, preds,6)

w_i_j = lambda i, j: (i - j)**2/25 # There are 6 categories so N = 6 and N - 1 = 5
O_i_dot = tf.math.reduce_sum(confusion_matrix,axis=0)
O_dot_j = tf.math.reduce_sum(confusion_matrix,axis=1)
E_i_j = lambda i, j: tf.experimental.numpy.outer(O_i_dot(i) , O_dot_j(j)) / tf.math.reduce_sum(confusion_matrix)

numerator = tf.math.reduce_sum([w_i_j(i, j) * confusion_matrix[i, j] for i in range(6) for j in range(6)])
denominator = tf.math.reduce_sum([w_i_j(i, j) * E_i_j(i , j) for i in range(6) for j in range(6)])
return 1 - numerator/denominator

在张量流和 keras 中使用梯度

问题描述投票：0回答：1

1个回答

最新问题

在张量流和 keras 中使用梯度

问题描述 投票：0回答：1

1个回答

最新问题

问题描述投票：0回答：1