我试图将numpy数组(x, 1, 768)和标签(1, 768)加载到tf.data中,我的代码如下。
import pandas as pdb
import pdb
import numpy as np
import os, glob
import tensorflow as tf
#from tensorflow import keras
from tensorflow.keras import layers, initializers
from tensorflow.keras.layers import Layer
from tensorflow.keras import backend as K
from tensorflow.keras import layers
#from tensorflow import keras
from keras.preprocessing.sequence import pad_sequences
from natsort import natsorted
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
#################################################################
#File Paths
text_path = 'data/featured/*'
tags_path ='data/encoded_tags/*'
text_files = natsorted(glob.glob(text_path)) # Load the array filenames
tags_files = natsorted(glob.glob(tags_path)) # Load the label filenames
text_train = text_files[:round(0.9*len(text_files))]
tags_train = tags_files[:round(0.9*len(tags_files))]
#Parameters
AUTO = tf.data.experimental.AUTOTUNE
index = 0
PADDING_LENGTH = 768
BATCH_LENGTH = 1
LEARNING_RATE = 0.01
OPTIMISER = 'ADAM'
#Define the training parameters here.
#################################################################
#@tf.function
def load_files(filename1, filename2):
tags = np.load(filename[1], allow_pickle=True)
arr = np.load(filename[0], allow_pickle=True)
# Perform padding and convert back to tensor
return arr, tags
def load_dataset(text_files, tag_files):
dataset = tf.data.Dataset.from_tensor_slices([text_files, tag_files])
print(dataset)
#dataset = dataset.map(load_files)
#dataset = dataset.map(lambda x: tf.py_function(load_files, [x], tf.float64))
dataset = dataset.map(map_func=load_files, num_parallel_calls=AUTO)
return dataset
def get_batch_dataset(filename1, filename2):
dataset = load_dataset(filename1, filename2)
dataset = dataset.batch(BATCH_LENGTH)
dataset = dataset.prefetch(AUTO).repeat()
return dataset
def get_training_dataset():
return get_batch_dataset(text_train, tags_train)
dataset = get_batch_dataset(text_train, tags_train)
当我试图通过数组的文件名和标签来读取numpy数组时, 它抛出了以下错误:
TypeError: expected str, bytes or os.PathLike object, not Tensor
我已经试过了:
filename1.numpy() # doesnt work:
AttributeError: 'Tensor' object has no attribute 'numpy'
filename.as_string() # doesnt work either:
AttributeError: 'Tensor' object has no attribute 'as_string'
我只是需要把数组读成numpy数组,因为我需要填充它们,我试过用tf.io.read_file()来读取它们,但不知怎么的,它把数组搞乱了,返回的形状是(None,). 每个数组(对于一个唯一的文件名)的长度为x,如前所述,我需要执行填充,并输出一个固定大小的数组,以便给它一个神经网络。
先谢谢你的帮助。
传递给我的函数是 dataset.map
将被跟踪并作为Tensorflow图执行。传递给函数的参数将是 Tensor
s. 这就是为什么你会得到
TypeError: expected str, bytes or os.PathLike object, not Tensor
如果你想让你的函数对字符串而不是Tensors进行操作,你可以使用 tf.py_function op:
def load_files_py(filename1, filename2):
tags = np.load(filename1.numpy(), allow_pickle=True)
arr = np.load(filename2.numpy(), allow_pickle=True)
# Perform padding and convert back to tensor
return arr, tags
def load_file(filename1, filename2):
return tf.py_function(load_files_py, inp=[filename1, filename2], Tout=[arr_type, tags_type])
下面的代码展示了传递给函数的参数在有和没有的情况下的不同。py_function
:
import tensorflow as tf
def load_py(a):
# Arguments to py_function are eager tensors, so we can use `.numpy()` to get their string values.
tf.print(type(a)) # <class 'tensorflow.python.framework.ops.EagerTensor'>
tf.print(a.numpy()) # b'a'
return a
def load(a):
# `load` is executed in graph mode, so `a` and `b` are non-eager Tensors.
tf.print(type(a)) # <class 'tensorflow.python.framework.ops.Tensor'>
return tf.py_function(load_py, inp=[a], Tout=[tf.string])
ds = tf.data.Dataset.from_tensor_slices((["a", "b"]))
ds = ds.map(load)
next(iter(ds))
https:/colab.research.google.comdrive1Tr04ykdBGx01uCMUHdyBLXV4VQMi-6dU。