我正在研究的数据集是Kaggle的猫和狗分类数据集。我定义了这样的数据:
path=r'C:\Users\berid\python\cats and dogs\PetImages'
data=tf.keras.utils.image_dataset_from_directory(path)
任何建议将不胜感激。
i在同一数据集上有完全相同的问题。 我从Kaggle下载了数据集,看来有一些不好的照片。 具有JPG文件扩展名,但格式为BMP或没有。 另外,有些照片有许多频道数量。 我使用下面的代码删除这些文件。 在25,000个中,只有150个,所以IMO没什么大不了的。 这是我的代码:
cats_filenames = [os.path.join(data_dir_cats, filename) for filename in os.listdir(data_dir_cats)]
dogs_filenames = [os.path.join(data_dir_dogs, filename) for filename in os.listdir(data_dir_dogs)]
print('Validating cat files....')
for cat_image in cats_filenames:
img = tf.keras.utils.load_img(cat_image)
if img.format != 'JPEG' and img.format != 'jpg':
print('Not jpeg. removing...', img.format, cat_image)
os.remove(cat_image)
else:
img=mpimg.imread(cat_image)
try:
if img.shape[2] < 1 or img.shape[2] > 4 or img.shape[2] == 2:
print(f'Removing... {img.shape=} {cat_image}')
os.remove(cat_image)
except Exception as e:
print(e, cat_image)
print('Validating dog files....')
for dog_image in dogs_filenames:
img = tf.keras.utils.load_img(dog_image)
if img.format != 'JPEG' and img.format != 'jpg':
print('Not jpeg. removing...', img.format, dog_image)
os.remove(dog_image)
else:
img=mpimg.imread(dog_image)
try:
if img.shape[2] < 1 or img.shape[2] > 4 or img.shape[2] == 2:
print(f'Removing... {img.shape=} {dog_image}')
os.remove(dog_image)
except Exception as e:
print(e, dog_image)
print('Done Validating....')
print(f"There are {len(os.listdir(data_dir_dogs))} images of dogs.")
print(f"There are {len(os.listdir(data_dir_cats))} images of cats.")
# Get the filenames for cats and dogs images
cats_filenames = [os.path.join(data_dir_cats, filename) for filename in os.listdir(data_dir_cats)]
dogs_filenames = [os.path.join(data_dir_dogs, filename) for filename in os.listdir(data_dir_dogs)]