我有代码:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
conversations = [
("Hello", "Hi there!"),
("How are you?", "I'm doing well, thanks."),
("What's your name?", "I'm a chatbot."),
]
tokenizer = Tokenizer()
tokenizer.fit_on_texts(conversations)
vocab_size = len(tokenizer.word_index) + 1
sequences = tokenizer.texts_to_sequences(conversations)
max_sequence_len = max([len(seq) for seq in sequences])
X, y = zip(*sequences)
X = pad_sequences(X, maxlen=max_sequence_len, padding='post')
y = pad_sequences(y, maxlen=max_sequence_len, padding='post')
model = Sequential([
Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
LSTM(100, return_sequences=True),
Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(X, y, epochs=50, verbose=1)
def generate_response(input_text):
input_seq = tokenizer.texts_to_sequences([input_text])
padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
predicted_output = model.predict(padded_input)
predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()
response = tokenizer.sequences_to_texts(predicted_word_index)
return response[0]
while True:
user_input = input(">>> ")
response = generate_response(user_input)
print(f"Chatbot: {response}")
但是我有错误:
Traceback (most recent call last):
File "C:\Users\mceca\Desktop\chatbot.py", line 14, in <module>
tokenizer.fit_on_texts(conversations)
File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 293, in fit_on_texts
seq = text_to_word_sequence(
File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 74, in text_to_word_sequence
input_text = input_text.lower()
AttributeError: 'tuple' object has no attribute 'lower'
我已经安装了tensorflow==2.12.0
我不想更改tensorflow版本,我的操作系统是windows 10,我没有GPU,我只有CPU。
我使用 keras,但如果您对张量流中的其他方式有建议,可以建议我。
如何修复此错误?如果您有更多答案,请全部输入。
您遇到的错误发生是因为 keras.preprocessing.text 中的 Tokenizer 类需要一个字符串列表(即文本样本),但收到的是一个元组列表。
理解错误 出现错误消息
AttributeError: 'tuple' object has no attribute 'lower'
是因为 Tokenizer.fit_on_texts 方法尝试在每个文本样本上调用 .lower() 方法,但遇到的是元组而不是字符串。
解决方案 您需要调整代码,以便传递文本字符串列表而不是元组。对于您的聊天机器人任务,您应该将输入和输出文本分成两个不同的列表,然后在需要时将它们组合起来。
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
# Prepare the conversation data
inputs = ["Hello", "How are you?", "What's your name?"]
responses = ["Hi there!", "I'm doing well, thanks.", "I'm a chatbot."]
# Initialize and fit the tokenizer on the input texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(inputs + responses) # Fit on both inputs and responses
# Define vocabulary size
vocab_size = len(tokenizer.word_index) + 1
# Convert texts to sequences
input_sequences = tokenizer.texts_to_sequences(inputs)
response_sequences = tokenizer.texts_to_sequences(responses)
# Determine the maximum sequence length
max_sequence_len = max(max(len(seq) for seq in input_sequences),
max(len(seq) for seq in response_sequences))
# Pad sequences
X = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')
y = pad_sequences(response_sequences, maxlen=max_sequence_len, padding='post')
# Define the model
model = Sequential([
Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
LSTM(100, return_sequences=True),
Dense(vocab_size, activation='softmax')
])
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Fit the model
model.fit(X, y, epochs=50, verbose=1)
def generate_response(input_text):
input_seq = tokenizer.texts_to_sequences([input_text])
padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
predicted_output = model.predict(padded_input)
predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()[0]
response = tokenizer.sequences_to_texts([predicted_word_index])
return response[0]
# Interaction loop
while True:
user_input = input(">>> ")
response = generate_response(user_input)
print(f"Chatbot: {response}")