在Python中导入的类中出现属性错误

问题描述 投票:0回答:1

我的

data_tokenization
课程在一个包中

import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
import json
import numpy as np
# from datasets import load_dataset
# dataset = load_dataset("cfilt/iitb-english-hindi")

class data_tokenization:

 def __init__(self):
     
    current_dir = os.getcwd()
    txt_folder = os.path.join(current_dir,r"data\\txt")
    dict_folder = os.path.join(current_dir,r"data\\dict")
    padded_folder = os.path.join(current_dir,r"data\\padded")



    self.current_dir = current_dir
    self.txt_folder = txt_folder
    self.dict_folder = dict_folder
    self.padded_folder = padded_folder
    

    

    #getting max sequence
    

    #loading dictionaries
    def get_source_dict(self):
        source_dict = {}
        with open(os.path.join(dict_folder,"source_dict.txt"),'r',encoding='utf-8') as f:
            source_dict = json.load(f)
        return source_dict    

    def get_target_dict(self):
        target_dict = {}
        with open(os.path.join(dict_folder,"target_dict.txt"),'r',encoding='utf-8') as f:
            target_dict = json.load(f)
        return target_dict

    def max_length(self):
        max_scent_len = 0
        for filename in ['source_full.txt','target_full.txt']:
            with open(os.path.join(txt_folder,filename),'r',encoding='utf-8') as f:
                for line in f:
                    if(len(line)>max_scent_len):
                        max_scent_len = len(line)
        return  max_scent_len


    #loading scentence from file
    def load_token_sequences(self,filename,dict):
        with open(os.path.join(txt_folder,filename),'r',encoding='utf-8') as f:
            seq = []
            for line in f:
                seq.append(tokenizer(line,dict))
            return seq    
        
    #tokenizing scentence
    def tokenizer(self,line,dict):
        seq=[]
        for word in line.split():
            seq.append(dict.get(word,1))
        return seq   

    

我在另一个名为

data_modeling.py
的文件中调用此标记化类,这两个文件位于同一文件夹中

data_modeling.py


import os
os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0'
import tensorflow as tf
import numpy as np

import json
from tensorflow.keras.layers import Embedding, Dense, LSTM
from tensorflow.keras.callbacks import EarlyStopping
from data_tokenization import data_tokenization  

current_dir = os.getcwd()
txt_folder = os.path.join(current_dir,r"data\\txt")
artifact_folder = os.path.join(current_dir,"artifacts")



tokenizer = data_tokenization()

source_dictionary = tokenizer.get_source_dict()
max_index = max(source_dictionary.values())
source_count= len(source_dictionary)

target_dictionary = tokenizer.get_target_dict()
target_count=len(target_dictionary)





#source embedding
model = tf.keras.Sequential()
model.add(Embedding(input_dim=max_index+1, output_dim=128))
model.add(LSTM(64,return_sequences=True))
model.add(LSTM(32,return_sequences=True))
#model.add(Dense(64, activation='relu'))
model.add(Dense(target_count,activation='softmax'))

model.compile(optimizer='adam',loss='sparse_categorical_crossentropy', metrics=['accuracy'])

model.build(input_shape=(None, tokenizer.max_length()))

model.summary()


valid_src_seq=[]
with open(os.path.join(txt_folder,"source_valid.txt"), 'r',encoding='utf-8') as f:
    for line in f:
        valid_src_seq.append(tokenizer.tokenizer(line,source_dictionary))


print(valid_src_seq[0:5])


# target_sequence_padded_tr = np.array([[target_dictonary.get(word, 0) for word in sequence] for sequence in target_sequence_padded_tr])
# target_sequence_padded_v = np.array([[target_dictonary.get(word, 0) for word in sequence] for sequence in target_sequence_padded_v])

# source_sequence_padded_tr_subset = source_sequence_padded_tr[:5000]
# target_sequence_padded_tr_subset = target_sequence_padded_tr[:5000]

early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=1, 
    min_delta=0.001,         
    restore_best_weights=True  
)


#model.fit(source_sequence_padded_tr, target_sequence_padded_tr, batch_size=10,verbose=1 ,callbacks=[early_stopping], epochs=10, validation_data=(source_sequence_padded_v, target_sequence_padded_v))

#target_sequence_padded_te = np.array([[target_dictonary.get(word, 0) for word in sequence] for sequence in target_sequence_padded_te])


# test_loss, test_acc = model.evaluate(source_sequence_padded_te, target_sequence_padded_te)
# print(f"Test Accuracy: {test_acc}")

# model.save(os.path.join(artifact_folder, "translator_model.h5"))

我收到错误

Traceback (most recent call last):
  File "d:\WorkSpace\H2ETranslator\models\data_modeling.py", line 20, in <module>
    source_dictionary = tokenizer.get_source_dict()
                        ^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'data_tokenization' object has no attribute 'get_source_dict'

我尝试将

tokenizer = data_tokenization()
更改为
tokenizer = data_tokenization.data_tokenization()
,但它没有改变结果,想了解原因以及如何解决这个问题

python class attributeerror
1个回答
0
投票

从评论中,我意识到缩进导致我的所有方法都进入了

__init__()
方法内部,修复了它

© www.soinside.com 2019 - 2024. All rights reserved.