我正在研究一个 AI NLP 模型,它将文本分为 6 类。我不能透露该模型的具体用途,抱歉。这是:-
Input = keras.layers.Input((1368, ))
embedding = keras.layers.Embedding(1386, 50)(Input)
# The model will have two branches, in one we will be using 1D convents, on the other we will be using RNNs.
# The convent part
convent_1 = keras.layers.Conv1D(128, 10, activation = "relu")(embedding)
convent_2 = keras.layers.Conv1D(64, 7, activation = "relu")(convent_1)
convent_3 = keras.layers.Conv1D(32, 5, activation = "relu")(convent_2)
maxpool_1 = keras.layers.MaxPool1D(2)(convent_3)
maxpool_2 = keras.layers.MaxPool1D(2)(maxpool_1)
flatten_1 = keras.layers.Flatten()(maxpool_2)
reducing_size_dense = keras.layers.Dense(128, activation = "relu")(flatten_1) # To match the size of the flatten layer of rnn part.
# The RNN part
rnn_1 = keras.layers.GRU(128, activation = "relu", return_sequences = True)(embedding)
rnn_2 = keras.layers.GRU(128, activation = "relu", return_sequences = False)(rnn_1)
flatten_2 = keras.layers.Flatten()(rnn_2)
# The classifier
sum_flatten = keras.layers.Add()([reducing_size_dense, flatten_2])
dense_1 = keras.layers.Dense(128, activation = "relu")(sum_flatten)
dense_2 = keras.layers.Dense(128, activation = "relu")(dense_1)
dense_3 = keras.layers.Dense(128, activation = "relu")(dense_2)
dense_4 = keras.layers.Dense(128, activation = "relu")(dense_3)
output_layer = keras.layers.Dense(6, activation = "softmax")(dense_4)
# We are using kappa quadratic loss function. Therefore we will have to take the argmax of the output layer.
# Defining the argmax layer
class Argmax_layer(keras.layers.Layer):
def call(self, input): # Keras layers are called by the call method.
return (tf.argmax(input, axis = -1) + 1) # We have to add 1 because the indices are in the range 0-5. We want them to be in the range 1-6.
argmax_layer = Argmax_layer()(output_layer)
model = keras.Model(Input, argmax_layer)
keras.utils.plot_model(model, show_shapes=True)
这是问题中描述的 keras 模型。当我实施训练时出现问题:-
callback_list = [keras.callbacks.EarlyStopping(monitor = "val_accuracy", patience = 10),
keras.callbacks.ModelCheckpoint(monitor = "val_accuracy", save_best_only = True, filepath = "scoring_model.keras"),
keras.callbacks.ReduceLROnPlateau(monitor = "val_accuracy", patience = 5, factor = 0.25)]
model.compile(optimizer = "rmsprop", loss = kappa_weighted_quad_loss, metrics = ["accuracy"])
history_model = model.fit(training_data, training_targets, validation_data = (validation_data, validation_targets), epochs = 100, callbacks = callback_list, batch_size = 20)
plot = dm.plotter(history = history_model.history)
plot.plot_loss()
plot.plot_acc()
注意,
dm.plot_acc()
和dm.plot_loss()
是我个人的方法,可以帮助我预处理数据和管理AI模型。我通常从这些简单的方法开始,然后逐渐倾向于张量板。
你可以看到我已经实现了自定义损失函数。这是损失函数:-
# The evaluation of the result uses the weighted kappa, thus we will be defining that loss function.
def kappa_weighted_quad_loss(targets, preds):
targets = targets - 1 # Because the targets and preds will be int he range 1-6, we will reduce 1 from each element and bring it to the range 0-5
preds = preds - 1
targets = tf.cast(targets, tf.int16)
preds = tf.cast(preds, tf.int16)
confusion_matrix = np.zeros(shape = (6, 6))
for i, j in zip(targets, preds):
confusion_matrix[i, j] += 1
w_i_j = lambda i, j: (i - j)**2/25 # There are 6 categories so N = 6 and N - 1 = 5
O_i_dot = lambda i: np.sum(confusion_matrix[i, :])
O_dot_j = lambda j: np.sum(confusion_matrix[:, j])
E_i_j = lambda i, j: np.outer(O_i_dot(i) , O_dot_j(j)) / np.sum(confusion_matrix)
numerator = np.sum([w_i_j(i, j) * confusion_matrix[i, j] for i in range(6) for j in range(6)])
denominator = np.sum([w_i_j(i, j) * E_i_j(i , j) for i in range(6) for j in range(6)])
return 1 - numerator/denominator
它是多类分类的 Cohen 加权 kappa 损失。我在 GPU 上工作时遇到的是这个错误:-
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[41], line 6
1 callback_list = [keras.callbacks.EarlyStopping(monitor = "val_accuracy", patience = 10),
2 keras.callbacks.ModelCheckpoint(monitor = "val_accuracy", save_best_only = True, filepath = "scoring_model.keras"),
3 keras.callbacks.ReduceLROnPlateau(monitor = "val_accuracy", patience = 5, factor = 0.25)]
5 model.compile(optimizer = "rmsprop", loss = kappa_weighted_quad_loss, metrics = ["accuracy"])
----> 6 history_model = model.fit(training_data, training_targets, validation_data = (validation_data, validation_targets), epochs = 100, callbacks = callback_list, batch_size = 20)
7 plot = dm.plotter(history = history_model.history)
8 plot.plot_loss()
File /opt/conda/lib/python3.10/site-packages/keras/src/utils/traceback_utils.py:122, in filter_traceback.<locals>.error_handler(*args, **kwargs)
119 filtered_tb = _process_traceback_frames(e.__traceback__)
120 # To get the full stack trace, call:
121 # `keras.config.disable_traceback_filtering()`
--> 122 raise e.with_traceback(filtered_tb) from None
123 finally:
124 del filtered_tb
File /opt/conda/lib/python3.10/site-packages/keras/src/optimizers/base_optimizer.py:662, in BaseOptimizer._filter_empty_gradients(self, grads, vars)
659 missing_grad_vars.append(v.name)
661 if not filtered_grads:
--> 662 raise ValueError("No gradients provided for any variable.")
663 if missing_grad_vars:
664 warnings.warn(
665 "Gradients do not exist for variables "
666 f"{list(reversed(missing_grad_vars))} when minimizing the loss."
667 " If using `model.compile()`, did you forget to provide a "
668 "`loss` argument?"
669 )
ValueError: No gradients provided for any variable.
我无法正确猜测这个错误的原因,但张量流似乎无法使用
tf.GradientTape
和其他梯度测量技术来计算梯度。
我认为这是因为停产了
Argmax_layer
但我对这个解释没有信心。有没有办法手动计算梯度或其他方法?
尝试将您的损失移植到 TensorFlow。我无法通过重现您的案例来测试它,但最糟糕的是它会为您指明正确的方向:
def kappa_weighted_quad_loss(targets, preds):
targets = targets - 1 # Because the targets and preds will be int he range 1-6, we will reduce 1 from each element and bring it to the range 0-5
preds = preds - 1
targets = tf.cast(targets, tf.int16)
preds = tf.cast(preds, tf.int16)
confusion_matrix = tf.math.confusion_matrix(targets, preds,6)
w_i_j = lambda i, j: (i - j)**2/25 # There are 6 categories so N = 6 and N - 1 = 5
O_i_dot = tf.math.reduce_sum(confusion_matrix,axis=0)
O_dot_j = tf.math.reduce_sum(confusion_matrix,axis=1)
E_i_j = lambda i, j: tf.experimental.numpy.outer(O_i_dot(i) , O_dot_j(j)) / tf.math.reduce_sum(confusion_matrix)
numerator = tf.math.reduce_sum([w_i_j(i, j) * confusion_matrix[i, j] for i in range(6) for j in range(6)])
denominator = tf.math.reduce_sum([w_i_j(i, j) * E_i_j(i , j) for i in range(6) for j in range(6)])
return 1 - numerator/denominator