我正在尝试使用 keras 顺序模型训练模型,我的代码如下:
from tensorflow.keras import Input
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
# Example input and output sequences
input_sequence = ["Madhup is a good boy.", "I am a large language model."]
output_sequence = ["Madgboy", "Imllm"]
# Create tokenizers for input and output
# input_tokenizer = Tokenizer(oov_token='<START>', lower=True)
input_tokenizer = Tokenizer(char_level=True, lower=True)
output_tokenizer = Tokenizer(char_level=True, lower=True)
# Convert text to sequences of integers
input_sequence_int = input_tokenizer.texts_to_sequences(input_sequence)
output_sequence_int = output_tokenizer.texts_to_sequences(output_sequence)
print(f"Input Sequence : {input_sequence_int}\n Output Sequence : {output_sequence_int} \n")
# Pad sequences to the same length
max_input_length = max(len(seq) for seq in input_sequence_int)
max_output_length = max(len(seq) for seq in output_sequence_int)
max_sequence_length = max(max_input_length, max_output_length)
print("Max Input Length ", max_input_length)
print("Max Output Length ", max_output_length)
print("Max Seq Length ", max_sequence_length)
input_sequence_padded = pad_sequences(input_sequence_int, maxlen=max_sequence_length, padding='post')
output_sequence_padded = pad_sequences(output_sequence_int, maxlen=max_sequence_length, padding='post')
print("Padded Input Sequence:", input_sequence_padded)
print("Padded Output Sequence:", output_sequence_padded)
import tensorflow as tf
from tensorflow.keras.layers import Embedding, LSTM, Dense
embedding_dim = 256
units = max_input_length
# Calculate the input vocab size
input_vocab_size = len(input_tokenizer.word_index) + 1
output_vocab_size = len(output_tokenizer.word_index) + 1
# Define the model
model = tf.keras.Sequential([
Embedding(input_vocab_size, embedding_dim, input_length=max_input_length),
LSTM(embedding_dim, return_sequences=True),
Dense(output_vocab_size, activation='softmax')
# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
# Train the model
model.fit(input_sequence_padded, output_sequence_padded, epochs=10)
import numpy as np
def generate_nickname(input_text):
input_sequence = input_tokenizer.texts_to_sequences([input_text])
print(f"Input Sequence: {input_sequence}")
# input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='post')
input_sequence = pad_sequences(input_sequence, maxlen=max_sequence_length, padding='post')
print(f"Padded Input Sequence: {input_sequence}")
predicted_sequence = model.predict(input_sequence)
max_arg = np.argmax(predicted_sequence, axis=-1)
predicted_nickname = output_tokenizer.sequences_to_texts(max_arg)[0]
return predicted_nickname
# Example usage
input_text = "Madhup is a good boy."
predicted_nickname = generate_nickname(input_text)
print(f"Input Phrase: {input_text}")
print(f"Generated Nickname: {predicted_nickname}")
填充输入序列:[[ 5 2 6 12 9 13 1 10 14 1 2 1 3 4 4 6 1 15 4 16 11 0 0 0 0 0 0 0] [10 1 2 5 1 2 1 7 2 17 3 8 1 7 2 18 3 9 2 3 8 1 5 4 6 8 7 11]] 填充 输出序列:[[1 3 4 5 6 7 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0] [9 1 2 2 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]
model.fit(input_sequence_padded, output_sequence_padded, epochs=10)
ValueError: Shapes (None, 28) and (None, 28, 10) are incompatible
model = tf.keras.Sequential([
Embedding(input_vocab_size, embedding_dim, input_length=max_input_length),
LSTM(embedding_dim, return_sequences=False),
Dense(max_input_length, activation='softmax')
因此,首先您需要禁用 LSTM 的 return_sequences,因为您不需要每个 LSTM 单元的输出,其次在最后一个 Dense 层中,您必须将节点设置为等于输出填充数据,该数据与输入相同。