所以这让我不知所措了一段时间,我似乎无法找到如何让生成器遵守数据帧的顺序。
这是我拥有的自定义 DataGenerator,重要的是
flow_from_directory
尊重顺序,因为这是数据集用于创建应该 1:1 映射到生成器输出的列表的顺序。
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
from tensorflow.data import Dataset
import tensorflow as tf
import config
class MultipleInputGenerator(Sequence):
def __init__(self, df):
self.image_generator = ImageDataGenerator(rotation_range=15,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
self.gen_1 = self.image_generator.flow_from_dataframe(dataframe=df,
directory = f"{config.BASE_DIR}{config.FILE_PREFIX}/",
x_col="id",
y_col="boneage",
batch_size=config.BATCH_SIZE,
seed=config.RANDOM_STATE,
class_mode="raw",
shuffle=False,
color_mode=config.COLOR_SPACE,
target_size=(config.IMG_SIZE, config.IMG_SIZE))
self.gen_2 = list(Dataset.from_tensor_slices(df.male).batch(config.BATCH_SIZE).as_numpy_iterator())
def __len__(self):
return self.gen_1.__len__()
def __getitem__(self, index):
x1_batch, y_batch = self.gen_1.__getitem__(index)
x2_batch = self.gen_2[index]
return [x1_batch, x2_batch], y_batch, self.gen_1.filenames[0:32]
这是输出:
x2_batch = [1 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 0 0]
y_batch = [-1.7075595 -0.80910918 0.40501286 0.69640215 0.11362357 -0.7605443
0.62355483 0.98779144 0.11362357 1.86195931 -0.51771989 0.55070751
-0.46915501 0.69640215 0.11362357 -0.17776572 0.11362357 -1.78040682
0.69640215 1.27918073 1.13348609 0.8420968 0.98779144 0.98779144
-0.32346037 -0.17776572 0.11362357 -0.46915501 -1.63471217 0.91494412
-0.17776572 0.25931821]
filename_id = ['13908.png', '14699.png', '11430.png', '4962.png', '6461.png', '12220.png', '6777.png', '4300.png', '11906.png', '13922.png', '14743.png', '13594.png', '14608.png', '10275.png', '6976.png', '10880.png', '2700.png', '1429.png', '2465.png', '12939.png', '3136.png', '11616.png', '10869.png', '15515.png', '3144.png', '12136.png', '14449.png', '11610.png', '15158.png', '5792.png', '11810.png', '6672.png']
这是DataFrame的顺序
id boneage male
11079 13908.png -1.707559 1
11790 14699.png -0.809109 0
8869 11430.png 0.405013 0
3058 4962.png 0.696402 0
4415 6461.png 0.113624 1
9569 12220.png -0.760544 1
5360 7517.png 1.861959 1
4695 6777.png 0.623555 1
5992 8217.png 0.259318 1
2631 4300.png 0.987791 1
9290 11906.png 0.113624 1
11092 13922.png 1.861959 1
11826 14743.png -0.517720 0
10800 13594.png 0.550708 0
11711 14608.png -0.469155 1
7110 9454.png -0.177766 0
7841 10275.png 0.696402 0
4871 6976.png 0.113624 0
8384 10880.png -0.177766 1
1193 2700.png 0.113624 0
45 1429.png -1.780407 1
976 2465.png 0.696402 1
10220 12939.png 1.279181 1
5538 7716.png -1.051934 1
1578 3136.png 1.133486 1
9027 11616.png 0.842097 1
8373 10869.png 0.987791 1
12528 15515.png 0.987791 1
1584 3144.png -0.323460 1
6209 8460.png -2.217491 1
9493 12136.png -0.177766 0
11568 14449.png 0.113624 0
x2_batch 与 DataFrame 的顺序匹配,但从 filename_id 中可以清楚地看出,
flow_from_dataframe
不遵守数据帧的顺序。
知道我怎样才能让它尊重数据框的顺序。
谢谢。