Tensorflow keras 错误 AttributeError: 'tuple' 对象没有属性 'lower'

问题描述 投票:0回答:1

我有代码:

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

conversations = [
    ("Hello", "Hi there!"),
    ("How are you?", "I'm doing well, thanks."),
    ("What's your name?", "I'm a chatbot."),
]

tokenizer = Tokenizer()
tokenizer.fit_on_texts(conversations)

vocab_size = len(tokenizer.word_index) + 1

sequences = tokenizer.texts_to_sequences(conversations)
max_sequence_len = max([len(seq) for seq in sequences])

X, y = zip(*sequences)
X = pad_sequences(X, maxlen=max_sequence_len, padding='post')
y = pad_sequences(y, maxlen=max_sequence_len, padding='post')

model = Sequential([
    Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
    LSTM(100, return_sequences=True),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.fit(X, y, epochs=50, verbose=1)

def generate_response(input_text):
    input_seq = tokenizer.texts_to_sequences([input_text])
    padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
    predicted_output = model.predict(padded_input)
    predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()
    response = tokenizer.sequences_to_texts(predicted_word_index)
    return response[0]

while True:
    user_input = input(">>> ")
    response = generate_response(user_input)
    print(f"Chatbot: {response}")

但是我有错误:

Traceback (most recent call last):
  File "C:\Users\mceca\Desktop\chatbot.py", line 14, in <module>
    tokenizer.fit_on_texts(conversations)
  File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 293, in fit_on_texts
    seq = text_to_word_sequence(
  File "C:\Users\mceca\AppData\Roaming\Python\Python310\site-packages\keras\preprocessing\text.py", line 74, in text_to_word_sequence
    input_text = input_text.lower()
AttributeError: 'tuple' object has no attribute 'lower'

我已经安装了tensorflow==2.12.0

我不想更改tensorflow版本,我的操作系统是windows 10,我没有GPU,我只有CPU。

我使用 keras,但如果您对张量流中的其他方式有建议,可以建议我。

如何修复此错误?如果您有更多答案,请全部输入。

python python-3.x tensorflow keras deep-learning
1个回答
0
投票

您遇到的错误发生是因为 keras.preprocessing.text 中的 Tokenizer 类需要一个字符串列表(即文本样本),但收到的是一个元组列表。

理解错误 出现错误消息

AttributeError: 'tuple' object has no attribute 'lower'
是因为 Tokenizer.fit_on_texts 方法尝试在每个文本样本上调用 .lower() 方法,但遇到的是元组而不是字符串。

解决方案 您需要调整代码,以便传递文本字符串列表而不是元组。对于您的聊天机器人任务,您应该将输入和输出文本分成两个不同的列表,然后在需要时将它们组合起来。

import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense

# Prepare the conversation data
inputs = ["Hello", "How are you?", "What's your name?"]
responses = ["Hi there!", "I'm doing well, thanks.", "I'm a chatbot."]

# Initialize and fit the tokenizer on the input texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(inputs + responses)  # Fit on both inputs and responses

# Define vocabulary size
vocab_size = len(tokenizer.word_index) + 1

# Convert texts to sequences
input_sequences = tokenizer.texts_to_sequences(inputs)
response_sequences = tokenizer.texts_to_sequences(responses)

# Determine the maximum sequence length
max_sequence_len = max(max(len(seq) for seq in input_sequences),
                    max(len(seq) for seq in response_sequences))

# Pad sequences
X = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='post')
y = pad_sequences(response_sequences, maxlen=max_sequence_len, padding='post')

# Define the model
model = Sequential([
    Embedding(vocab_size, 64, input_length=max_sequence_len, mask_zero=True),
    LSTM(100, return_sequences=True),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Fit the model
model.fit(X, y, epochs=50, verbose=1)

def generate_response(input_text):
    input_seq = tokenizer.texts_to_sequences([input_text])
    padded_input = pad_sequences(input_seq, maxlen=max_sequence_len, padding='post')
    predicted_output = model.predict(padded_input)
    predicted_word_index = tf.argmax(predicted_output, axis=-1).numpy()[0]
    response = tokenizer.sequences_to_texts([predicted_word_index])
    return response[0]

# Interaction loop
while True:
    user_input = input(">>> ")
    response = generate_response(user_input)
    print(f"Chatbot: {response}")
© www.soinside.com 2019 - 2024. All rights reserved.