tensorflow.python.framework.errors_impl.FailedPreconditionError:C:\ Users \ Ata Onur Özdemir \ PycharmProjects \ Emotion_Detection \输出不是目录

问题描述 投票:0回答:1

这是酒店评论(如正面或负面评论)的情感分析示例代码。我使用 pandas、transformers、datasets、turkish_lm_tuner 库。首先,我认为路径名称(C:\Users\Ata Onur Özdemir 所以“Ö”字母)和我更改了,但它没有修复并且输出给出相同的错误。其次,我在输出文件夹中创建 init.py 但这种方式给出了相同的错误。 我在环境变量中定义了路径,但同样的错误又出现了。我搜索谷歌和stackoverflow,但我找不到任何方法,我尝试了很多方法,但我没有。请帮助我:)我搜索谷歌和stackoverflow但我找不到任何方法,我尝试了很多方法但我不能。请帮助我:)

import pandas as pd
from transformers import AutoTokenizer
from datasets import Dataset
from turkish_lm_tuner import TrainerForClassification, EvaluatorForClassification
import os

# Load the data
data = pd.read_csv('../Emotion_Detection/Hotel_readablee.csv')

output_dir = 'C:\\Users\\Ata Onur Özdemir\\PycharmProjects\\Emotion_Detection\\output'

# Ensure the output directory exists and is a directory
if not os.path.exists(output_dir):
    os.makedirs(output_dir)
elif not os.path.isdir(output_dir):
    raise NotADirectoryError(f"{output_dir} is not a directory")

# Initialize the tokenizer
model_name = "boun-tabi-LMG/TURNA"
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Custom Dataset Processor class
class CustomDatasetProcessor:
    def __init__(self, tokenizer, max_input_length):
        self.tokenizer = tokenizer
        self.max_input_length = max_input_length

    def load_and_preprocess_data(self, data):
        dataset = Dataset.from_pandas(data)

        def preprocess_function(examples):
            # Convert each review text to string type
            positive_reviews = [str(review) for review in examples['Positive_Review_Tr']]
            negative_reviews = [str(review) for review in examples['Negative_Review_Tr']]

            # Use the tokenizer correctly
            tokenized_reviews = self.tokenizer(
                positive_reviews,
                negative_reviews,
                truncation=True,
                padding='max_length',
                max_length=self.max_input_length,
                return_tensors='pt'  # Return PyTorch tensors
            )

            return tokenized_reviews

        tokenized_dataset = dataset.map(preprocess_function, batched=True)
        return tokenized_dataset

# Initialize dataset processor
dataset_processor = CustomDatasetProcessor(tokenizer, max_input_length=2048)

# Split the data into train, validation, and test sets
train_data = data.sample(frac=0.8, random_state=42)
remaining_data = data.drop(train_data.index)
validation_data = remaining_data.sample(frac=0.5, random_state=42)
test_data = remaining_data.drop(validation_data.index)

# Preprocess datasets
train_dataset = dataset_processor.load_and_preprocess_data(train_data)
eval_dataset = dataset_processor.load_and_preprocess_data(validation_data)
test_dataset = dataset_processor.load_and_preprocess_data(test_data)

# Training parameters
training_params = {
    'num_train_epochs': 10,
    'per_device_train_batch_size': 4,
    'per_device_eval_batch_size': 4,
    'output_dir': 'C:\\Users\\Ata Onur Özdemir\\PycharmProjects\\Emotion_Detection\\output',
    'evaluation_strategy': 'epoch',
    'save_strategy': 'epoch',
}

# Optimizer parameters
optimizer_params = {
    'optimizer_type': 'adafactor',
    'scheduler': False,
}

# Test parameters
test_params = {
    'per_device_eval_batch_size': 4,
    'output_dir': 'C:\\Users\\Ata Onur Özdemir\\PycharmProjects\\Emotion_Detection\\output',
}

num_labels = 4  # Assuming you are performing binary classification

# Initialize TrainerForClassification
model_trainer = TrainerForClassification(
    model_name=model_name,
    num_labels=num_labels,
    task='classification',
    optimizer_params=optimizer_params,
    training_params=training_params,
    model_save_path="hotel_reviews_classification_model",
    test_params=test_params  # Add test_params here
)

# Train and evaluate the model
trainer, model = model_trainer.train_and_evaluate(train_dataset, eval_dataset, test_dataset)

# Save the trained model and tokenizer
model.save_pretrained("hotel_reviews_classification_model")
tokenizer.save_pretrained("hotel_reviews_classification_model")

# Evaluate the model using EvaluatorForClassification
evaluator = EvaluatorForClassification(
    model_save_path="hotel_reviews_classification_model",
    model_name=model_name,
    task='classification',
    test_params=test_params,  # Add test_params here
    num_labels=num_labels  # Add num_labels parameter here
)

# Evaluate model on test dataset
results = evaluator.evaluate_model(test_dataset)

# Convert results to DataFrame
results_df = pd.DataFrame(results)

# Save results to a new CSV file
results_df.to_csv('evaluation_results.csv', index=False)
print("Evaluation results saved to evaluation_results.csv.")

系统出现此错误:

Traceback (most recent call last):
  File "C:\Users\Ata Onur Özdemir\PycharmProjects\Emotion_Detection\main.py", line 101, in <module>
    trainer, model = model_trainer.train_and_evaluate(train_dataset, eval_dataset, test_dataset)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\turkish_lm_tuner\trainer.py", line 195, in train_and_evaluate
    trainer.train()
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer.py", line 1885, in train
    return inner_training_loop(
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer.py", line 2147, in _inner_training_loop
    self.control = self.callback_handler.on_train_begin(args, self.state, self.control)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer_callback.py", line 454, in on_train_begin
    return self.call_event("on_train_begin", args, state, control)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\trainer_callback.py", line 498, in call_event
    result = getattr(callback, event)(
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\integrations\integration_utils.py", line 629, in on_train_begin
    self._init_summary_writer(args, log_dir)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\transformers\integrations\integration_utils.py", line 615, in _init_summary_writer
    self.tb_writer = self._SummaryWriter(log_dir=log_dir)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 249, in __init__
    self._get_file_writer()
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 281, in _get_file_writer
    self.file_writer = FileWriter(
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\torch\utils\tensorboard\writer.py", line 75, in __init__
    self.event_writer = EventFileWriter(
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\tensorboard\summary\writer\event_file_writer.py", line 72, in __init__
    tf.io.gfile.makedirs(logdir)
  File "C:\Users\Ata Onur Özdemir\venv\lib\site-packages\tensorflow\python\lib\io\file_io.py", line 513, in recursive_create_dir_v2
    _pywrap_file_io.RecursivelyCreateDir(compat.path_to_bytes(path))
tensorflow.python.framework.errors_impl.FailedPreconditionError: C:\Users\Ata Onur Özdemir\PycharmProjects\Emotion_Detection\output is not a directory
python-3.x dictionary tensorflow huggingface-transformers huggingface-trainer
1个回答
0
投票

我创建了这个问题的最小可重现示例

import os

output_dir = 'D:\\TEMP\\PhycharmProjects\\Emotion_Detection\\output'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)
    print(f"{output_dir} created!")
elif not os.path.isdir(output_dir):
    print(f"{output_dir} NOT created!")
    raise NotADirectoryError(f"{output_dir} is not a directory")

运行此代码会给出以下输出:

D:\TEMP>python testdir.py
D:\TEMP\PhycharmProjects\Emotion_Detection\output created!

我什至仔细检查过(这是不需要的,但是......)

D:\TEMP>tree PhycharmProjects
Folder PATH listing for volume HDD
Volume serial number is D46B-804B
D:\TEMP\PHYCHARMPROJECTS
└───Emotion_Detection
    └───output
© www.soinside.com 2019 - 2024. All rights reserved.