无法通过下载链接将文件导入 google colab 以训练人工智能

问题描述 投票:0回答:1

当我使用自己的下载链接时,文件无法下载到 Colab 中。我不明白为什么它不起作用。我还查看了不同的网站,了解如何在 colab 中管理文件,但没有任何效果,我对代码的经验很少,想知道是否有人愿意解决我的问题,因为我认为这可能是一个容易解决的问题

完整代码如下:

pip install tensorflow numpy matplotlib

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.applications import MobileNetV2, ResNet50, InceptionV3 # try to use them and see which is better
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
from tensorflow.keras.utils import get_file
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
import pathlib
import numpy as np

batch_size = 5
# 7 artists (currently)
num_classes = 7
# training for 10 epochs
epochs = 10
# size of each image
IMAGE_SHAPE = (1000, 1000, 3)

def load_data():
    """This function downloads, extracts, loads, normalizes and one-hot encodes Flower Photos dataset"""
    # download the dataset and extract it
    #data_dir = get_file(origin='https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6',  # original file https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
    data_url = "https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6"
    data_dir = get_file(origin=data_url, fname="data_set", untar=True)                                          # original fname = flower_photos
    data_dir = pathlib.Path(data_dir)
    # count how many images are there
    image_count = len(list(data_dir.glob('*/*.jpg')))
    print("Number of images:", image_count)
    # get all classes for this dataset (types of flowers) excluding LICENSE file
    CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if item.name != "LICENSE.txt"])
    # roses = list(data_dir.glob('roses/*'))
    # 20% validation set 80% training set
    image_generator = ImageDataGenerator(rescale=1/255, validation_split=0.2)
    # make the training dataset generator
    train_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size,
                                                        classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                        shuffle=True, subset="training")
    # make the validation dataset generator
    test_data_gen = image_generator.flow_from_directory(directory=str(data_dir), batch_size=batch_size,
                                                        classes=list(CLASS_NAMES), target_size=(IMAGE_SHAPE[0], IMAGE_SHAPE[1]),
                                                        shuffle=True, subset="validation")
    return train_data_gen, test_data_gen, CLASS_NAMES


def create_model(input_shape):
    # load MobileNetV2
    model = MobileNetV2(input_shape=input_shape)
    # remove the last fully connected layer
    model.layers.pop()
    # freeze all the weights of the model except the last 4 layers
    for layer in model.layers[:-4]:
        layer.trainable = False
    # construct our own fully connected layer for classification
    output = Dense(num_classes, activation="softmax")
    # connect that dense layer to the model
    output = output(model.layers[-1].output)
    model = Model(inputs=model.inputs, outputs=output)
    # print the summary of the model architecture
    model.summary()
    # training the model using adam optimizer
    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
    return model


if __name__ == "__main__":
    # load the data generators
    train_generator, validation_generator, class_names = load_data()
    # constructs the model
    model = create_model(input_shape=IMAGE_SHAPE)
    # model name
    model_name = "MobileNetV2_finetune_last5"
    # some nice callbacks
    tensorboard = TensorBoard(log_dir=os.path.join("logs", model_name))
    checkpoint = ModelCheckpoint(os.path.join("results", f"{model_name}" + "-loss-{val_loss:.2f}.h5"),
                                save_best_only=True,
                                verbose=1)
    # make sure results folder exist
    if not os.path.isdir("results"):
        os.mkdir("results")
    # count number of steps per epoch
    training_steps_per_epoch = np.ceil(train_generator.samples / batch_size)
    validation_steps_per_epoch = np.ceil(validation_generator.samples / batch_size)
    # train using the generators
    model.fit_generator(train_generator, steps_per_epoch=training_steps_per_epoch,
                        validation_data=validation_generator, validation_steps=validation_steps_per_epoch,
                        epochs=epochs, verbose=1, callbacks=[tensorboard, checkpoint])

which then outputs this error when its run:

Downloading data from https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6
   8192/Unknown - 0s 0us/stepNumber of images: 0
---------------------------------------------------------------------------
FileNotFoundError                         Traceback (most recent call last)
<ipython-input-10-102c652a5bdf> in <cell line: 1>()
      1 if __name__ == "__main__":
      2     # load the data generators
----> 3     train_generator, validation_generator, class_names = load_data()
      4     # constructs the model
      5     model = create_model(input_shape=IMAGE_SHAPE)

/usr/local/lib/python3.10/dist-packages/keras/src/preprocessing/image.py in __init__(self, directory, image_data_generator, target_size, color_mode, classes, class_mode, batch_size, shuffle, seed, data_format, save_to_dir, save_prefix, save_format, follow_links, subset, interpolation, keep_aspect_ratio, dtype)
 561         if not classes:
    562             classes = []
--> 563             for subdir in sorted(os.listdir(directory)):
    564                 if os.path.isdir(os.path.join(directory, subdir)):
    565                     classes.append(subdir)

FileNotFoundError: [Errno 2] No such file or directory: '/root/.keras/datasets/data_set

我尝试替换文件夹、创建新的下载链接、更改路径等,但仍然不起作用

artificial-intelligence google-colaboratory training-data file-management getfiles
1个回答
0
投票

实际的下载网址似乎是https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6。我可以通过`

看到它的工作原理
wget -O 18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip --no-check-certificate -r 'https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6'

WARNING: combining -O with -r or -p will mean that all downloaded content
will be placed in the single file you specified.

--2023-09-24 09:05:56--  https://drive.google.com/uc?export=download&id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6
Resolving drive.google.com (drive.google.com)... 172.217.164.110
Connecting to drive.google.com (drive.google.com)|172.217.164.110|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’

18FLNpct5RZlf4BZBpAaM6n7bzoQmSQ     [ <=>                                                   ]   2.22K  --.-KB/s    in 0s

2023-09-24 09:05:56 (72.4 MB/s) - ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’ saved [2277]

Loading robots.txt; please ignore errors.
--2023-09-24 09:05:56--  https://drive.google.com/robots.txt
Reusing existing connection to drive.google.com:443.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/plain]
Saving to: ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’

18FLNpct5RZlf4BZBpAaM6n7bzoQmSQ     [ <=>                                                   ]     570  --.-KB/s    in 0s

2023-09-24 09:05:56 (19.4 MB/s) - ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’ saved [570]

--2023-09-24 09:05:56--  https://drive.google.com/open?id=18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6
Reusing existing connection to drive.google.com:443.
HTTP request sent, awaiting response... 307 Temporary Redirect
Location: https://drive.google.com/file/d/18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6/view?usp=drive_open [following]
--2023-09-24 09:05:56--  https://drive.google.com/file/d/18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6/view?usp=drive_open
Reusing existing connection to drive.google.com:443.
HTTP request sent, awaiting response... 200 OK
Length: unspecified [text/html]
Saving to: ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’

18FLNpct5RZlf4BZBpAaM6n7bzoQmSQ     [ <=>                                                   ]  80.25K  --.-KB/s    in 0.09s

2023-09-24 09:05:57 (866 KB/s) - ‘18FLNpct5RZlf4BZBpAaM6n7bzoQmSQR6.zip’ saved [82171]

FINISHED --2023-09-24 09:05:57--
Total wall clock time: 1.1s
Downloaded: 3 files, 83K in 0.09s (895 KB/s)

`

© www.soinside.com 2019 - 2024. All rights reserved.