from torchvision_starter.engine import train_one_epoch, evaluate
from torchvision_starter import utils
import multiprocessing
import time
n_cpu = multiprocessing.cpu_count()
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
_ = model.to(device)
params = [p for p in model.parameters() if p.requires_grad]
optimizer = torch.optim.Adam(model.parameters(), lr=0.00001)
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
step_size=3,
gamma=0.2,
verbose=True
)
# Let's train for 10 epochs
num_epochs = 1
start = time.time()
for epoch in range(10, 10 + num_epochs):
# train for one epoch, printing every 10 iterations
train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10)
# update the learning rate
lr_scheduler.step()
# evaluate on the validation dataset
evaluate(model, data_loaders['valid'], device=device)
stop = time.time()
print(f"\n\n{num_epochs} epochs in {stop - start} s ({(stop-start) / 3600:.2f} hrs)")
在我继续这部分之前,一切都很好。但是在我运行该部分之后,错误如下:
TypeError Traceback (most recent call last) Cell In[5], line 39 35 start = time.time() 37 for epoch in range(10, 10 + num_epochs): 38 # train for one epoch, printing every 10 iterations ---> 39 train_one_epoch(model, optimizer, data_loaders['train'], device, epoch, print_freq=10) 40 # update the learning rate 41 lr_scheduler.step() File ~\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\engine.py:26, in train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq) 22 warmup_iters = min(1000, len(data_loader) - 1) 24 lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) ---> 26 for images, targets in metric_logger.log_every(data_loader, print_freq, header): 27 images = list(image.to(device) for image in images) 28 targets = [{k: v.to(device) for k, v in t.items()} for t in targets] File ~\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\utils.py:209, in MetricLogger.log_every(self, iterable, print_freq, header) 200 log_msg = self.delimiter.join([ 201 header, 202 '[{0' + space_fmt + '}/{1}]', (...) 206 'data: {data}' 207 ]) 208 MB = 1024.0 * 1024.0 --> 209 for obj in iterable: 210 data_time.update(time.time() - end) 211 yield obj File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:652, in _BaseDataLoaderIter.__next__(self) 649 if self._sampler_iter is None: 650 # TODO(https://github.com/pytorch/pytorch/issues/76750) 651 self._reset() # type: ignore[call-arg] --> 652 data = self._next_data() 653 self._num_yielded += 1 654 if self._dataset_kind == _DatasetKind.Iterable and \ 655 self._IterableDataset_len_called is not None and \ 656 self._num_yielded > self._IterableDataset_len_called: File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:1347, in _MultiProcessingDataLoaderIter._next_data(self) 1345 else: 1346 del self._task_info[idx] -> 1347 return self._process_data(data) File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\dataloader.py:1373, in _MultiProcessingDataLoaderIter._process_data(self, data) 1371 self._try_put_index() 1372 if isinstance(data, ExceptionWrapper): -> 1373 data.reraise() 1374 return data File ~\anaconda3\envs\dpl_U\lib\site-packages\torch\_utils.py:461, in ExceptionWrapper.reraise(self) 457 except TypeError: 458 # If the exception takes multiple arguments, don't try to 459 # instantiate since we don't know how to 460 raise RuntimeError(msg) from None --> 461 raise exception TypeError: Caught TypeError in DataLoader worker process 0. Original Traceback (most recent call last): File "C:\Users\cheng\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\_utils\worker.py", line 302, in _worker_loop data = fetcher.fetch(index) File "C:\Users\cheng\anaconda3\envs\dpl_U\lib\site-packages\torch\utils\data\_utils\fetch.py", line 52, in fetch return self.collate_fn(data) File "C:\Users\cheng\Desktop\Object Detection and Segmentation\workspace-1655609281-1\home\torchvision_starter\utils.py", line 236, in collate_fn return tuple(zip(*batch)) TypeError: 'KeyError' object is not iterable
我尝试将 drop_last 添加到 helper.py 的函数中,例如:
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
但是没用。顺便说一句,torch和torchvision是兼容的,并且可以使用Cuda。 我想知道如何解决它。
get_data_loaders 函数:
def get_data_loaders(
folder, batch_size: int = 2, valid_size: float = 0.2, num_workers: int = -1, limit: int = -1, thinning: int = None
):
"""
Create and returns the train_one_epoch, validation and test data loaders.
:param foder: folder containing the dataset
:param batch_size: size of the mini-batches
:param valid_size: fraction of the dataset to use for validation. For example 0.2
means that 20% of the dataset will be used for validation
:param num_workers: number of workers to use in the data loaders. Use -1 to mean
"use all my cores"
:param limit: maximum number of data points to consider
:param thinning: take every n-th frame, instead of all frames
:return a dictionary with 3 keys: 'train_one_epoch', 'valid' and 'test' containing respectively the
train_one_epoch, validation and test data loaders
"""
if num_workers == -1:
# Use all cores
num_workers = multiprocessing.cpu_count()
# We will fill this up later
data_loaders = {"train": None, "valid": None, "test": None}
# create 3 sets of data transforms: one for the training dataset,
# containing data augmentation, one for the validation dataset
# (without data augmentation) and one for the test set (again
# without augmentation)
data_transforms = {
"train": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=True),
"valid": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
"test": get_transform(UdacitySelfDrivingDataset.mean, UdacitySelfDrivingDataset.std, train=False),
}
# Create train and validation datasets
train_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["train"],
train=True,
thinning=thinning
)
# The validation dataset is a split from the train_one_epoch dataset, so we read
# from the same folder, but we apply the transforms for validation
valid_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["valid"],
train=True,
thinning=thinning
)
# obtain training indices that will be used for validation
n_tot = len(train_data)
indices = torch.randperm(n_tot)
# If requested, limit the number of data points to consider
if limit > 0:
indices = indices[:limit]
n_tot = limit
split = int(math.ceil(valid_size * n_tot))
train_idx, valid_idx = indices[split:], indices[:split]
# define samplers for obtaining training and validation batches
train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
valid_sampler = torch.utils.data.SubsetRandomSampler(valid_idx) # =
# prepare data loaders
data_loaders["train"] = torch.utils.data.DataLoader(
train_data,
batch_size=batch_size,
sampler=train_sampler,
num_workers=num_workers,
collate_fn=utils.collate_fn,
drop_last=True
)
data_loaders["valid"] = torch.utils.data.DataLoader(
valid_data, # -
batch_size=batch_size, # -
sampler=valid_sampler, # -
num_workers=num_workers, # -
collate_fn=utils.collate_fn,
drop_last=True
)
# Now create the test data loader
test_data = UdacitySelfDrivingDataset(
folder,
transform=data_transforms["test"],
train=False,
thinning=thinning
)
if limit > 0:
indices = torch.arange(limit)
test_sampler = torch.utils.data.SubsetRandomSampler(indices)
else:
test_sampler = None
data_loaders["test"] = torch.utils.data.DataLoader(
test_data,
batch_size=batch_size,
shuffle=False,
num_workers=num_workers,
sampler=test_sampler,
collate_fn=utils.collate_fn,
drop_last=True
# -
)
return data_loaders