from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from datasets import load_dataset
from torch.utils.data import DataLoader
import torch
from tqdm import tqdm
# Загрузка модели и токенизатора
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
# Загрузка и подготовка данных
def load_and_prepare_data(dataset_name="squad"):
dataset = load_dataset(dataset_name, split='train')
dataset = dataset.filter(lambda example: len(example['context']) <= 512)
dataset = dataset.map(
lambda example: {
'input_ids': tokenizer(example['question'], example['context'], truncation=True, padding="max_length", return_tensors="pt")['input_ids'].squeeze(0),
'attention_mask': tokenizer(example['question'], example['context'], truncation=True, padding="max_length", return_tensors="pt")['attention_mask'].squeeze(0),
'labels': tokenizer(example['answers']['text'][0], truncation=True, padding="max_length", return_tensors="pt")['input_ids'].squeeze(0)
}
)
# Разделяем данные на тренировочный и тестовый наборы
train_size = int(0.8 * len(dataset))
train_data, test_data = torch.utils.data.random_split(dataset, [train_size, len(dataset) - train_size])
# Создаем DataLoader для тренировочных и тестовых данных
train_dataloader = DataLoader(train_data, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=8)
return train_dataloader, test_dataloader
# Обучение модели
def train_model(model, train_dataloader, test_dataloader, epochs=3):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
for epoch in range(epochs):
model.train()
for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}"):
optimizer.zero_grad()
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
labels = batch['labels']
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
loss.backward()
optimizer.step()
model.eval()
total_loss = 0
for batch in tqdm(test_dataloader, desc=f"Epoch {epoch+1} - Validation"):
with torch.no_grad():
input_ids = batch['input_ids']
attention_mask = batch['attention_mask']
labels = batch['labels']
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
total_loss += outputs.loss.item()
avg_loss = total_loss / len(test_dataloader)
print(f"Epoch {epoch+1} - Validation Loss: {avg_loss:.4f}")
# Загрузка и подготовка данных
train_dataloader, test_dataloader = load_and_prepare_data()
# Обучение модели
train_model(model, train_dataloader, test_dataloader)
# Сохранение модели
torch.save(model.state_dict(), "chatbot_model.pth")
# Основной цикл
while True:
user_input = input("You: ")
if user_input.lower() == "exit":
break
else:
# Генерация ответа
inputs = tokenizer(user_input, return_tensors="pt")
outputs = model.generate(**inputs, max_length=50)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(f"Bot: {response}")
Error:
AttributeError Traceback (most recent call last)
<ipython-input-6-fadd0eb00ebb> in <cell line: 66>()
64
65 # Обучение модели
---> 66 train_model(model, train_dataloader, test_dataloader)
67
68 # Сохранение модели
/usr/local/lib/python3.10/dist-packages/transformers/models/t5/modeling_t5.py in forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, cross_attn_head_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
999 )
1000 elif input_ids is not None:
-> 1001 input_shape = input_ids.size()
1002 input_ids = input_ids.view(-1, input_shape[-1])
1003 elif inputs_embeds is not None:
AttributeError: 'list' object has no attribute 'size'
我还是不明白这是什么错误。
发生此错误是因为
input_ids
(以及其他输入,如 attention_mask
和 labels
)在 train_model
函数中作为列表而不是张量传递。 PyTorch 模型需要张量,而不是列表,因此您需要确保所有数据在传递给模型之前都采用正确的格式。
这是代码:
def train_model(model, train_dataloader, test_dataloader, epochs=3):
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)
for epoch in range(epochs):
model.train()
for batch in tqdm(train_dataloader, desc=f"Epoch {epoch+1}"):
optimizer.zero_grad()
input_ids = torch.stack(batch['input_ids']).to(model.device) # Convert list to tensor
attention_mask = torch.stack(batch['attention_mask']).to(model.device) # Convert list to tensor
labels = torch.stack(batch['labels']).to(model.device) # Convert list to tensor
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
loss = outputs.loss
loss.backward()
optimizer.step()
model.eval()
total_loss = 0
for batch in tqdm(test_dataloader, desc=f"Epoch {epoch+1} - Validation"):
with torch.no_grad():
input_ids = torch.stack(batch['input_ids']).to(model.device) # Convert list to tensor
attention_mask = torch.stack(batch['attention_mask']).to(model.device) # Convert list to tensor
labels = torch.stack(batch['labels']).to(model.device) # Convert list to tensor
outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)
total_loss += outputs.loss.item()
avg_loss = total_loss / len(test_dataloader)
print(f"Epoch {epoch+1} - Validation Loss: {avg_loss:.4f}")
希望这对你有一点帮助。