我正在训练神经网络初始化,学习率 = 0.001 和优化器 = Adam,
optimizer = optim.Adam(net.parameters(), lr=opt.lr_init)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
这是训练过程的片段
每 10 次迭代,打印信息。 如果您在这里查看学习率 lr,对于前 20 次迭代 lr = 0.001,在接下来的 10 次迭代中 [030/3434] 它如何变为 0.00025?为什么这里不是 0.001?并且如前所述,步长=30,因此在 30 次迭代后,学习率降低了 0.5 倍,即接下来的 30 次迭代变为 0.0005,依此类推。
我主要关心的是 Iteration[030/3434]、[060/3434]、[090/3434] 的 lr 值...
相关培训代码:
print('train network begin..')
# ------------------------------------ step 3/5 ------------------------------------
criterion = nn.L1Loss() # loss function
# criterion = nn.BCELoss() # loss function to compare
criterion_bet = nn.MSELoss(size_average=False)
ssim_loss = pytorch_ssim.SSIM(window_size = 11)#to maximize this loss
#optimizer = optim.SGD(net.parameters(), lr=opt.lr_init, momentum=0.9, dampening=0.1)
optimizer = optim.Adam(net.parameters(), lr=opt.lr_init)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
# ------------------------------------ step 4/5 --------------------------------------------------
for epoch in range(opt.max_epoch):
loss_sigma = loss_gradients_sum = loss_between_pair_sum = loss_pair_lable_sum = loss_ssim_sum = 0.0 # loss
scheduler.step()
for i, data in enumerate(train_loader):
# images and labels
input1, input2, gt1, gt2, lb1, lb2, gtimg = data
input1, input2, gt1, gt2, gtimg = Variable(input1).cuda(), Variable(input2).cuda(), Variable(gt1).cuda(), Variable(gt2).cuda(),Variable(gtimg).cuda().requires_grad_(False)
inputs = torch.cat((input1, input2), 1) #concat input pair image tensor
labels = torch.cat((gt1, gt2), 1).requires_grad_(False)#concat ground truth image tensor
# forward, backward, update weights
optimizer.zero_grad()
outputs = net(inputs)
loss_pair_lable = criterion(outputs,labels)# loss for each image prediction and gt
g1 = outputs[:,0,:,:].unsqueeze(1) * (input1.data )# predict feature_map1
g2 = outputs[:, 1, :, :].unsqueeze(1) * (input2.data)#predict feature_map2
mergeimg = g1 + g2
loss_ssim = 1.0 - ssim_loss(mergeimg.cuda(),gtimg.cuda())
loss_gradients = GL.gradient_loss_merge(mergeimg,gtimg,opt.cuda,device=0)
#loss of 1-A-B
sumpreds = outputs[:,0,:,:] + outputs[:,1,:,:]
sumpreds = 1.0-sumpreds.unsqueeze(1)
zeroimg = torch.zeros(sumpreds.size())
loss_between_pair = criterion_bet(sumpreds.cuda(),zeroimg.cuda())
loss = 0.8*loss_pair_lable +0.1*loss_ssim +0.1*loss_gradients
loss.backward()
torch.nn.utils.clip_grad_norm(net.parameters(), 0.5)
optimizer.step()
loss_sigma += loss.item()
loss_gradients_sum += loss_gradients.item()
#loss_between_pair_sum += loss_between_pair.item()
loss_pair_lable_sum += loss_pair_lable.item()
loss_ssim_sum += loss_ssim.item()
# print information for each 10 iteration
if i % 10 == 9:
loss_avg = loss_sigma / 10
loss_pair_lable_avg = loss_pair_lable_sum / 10
loss_between_pair_avg = loss_between_pair_sum / 10
loss_gradients_avg = loss_gradients_sum / 10
loss_ssim_avg = loss_ssim_sum / 10
loss_sigma = loss_pair_lable_sum = loss_between_pair_sum = loss_gradients_sum = loss_ssim_sum = 0.0
print(
"Training: Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] lr:{} Loss: {:.4f} Loss_pair: {:.4f} Loss_bt: {:.4f} Loss_grads: {:.4f} Loss_ssim: {:.4f} ".format(
epoch + 1, opt.max_epoch, i + 1, len(train_loader), scheduler.get_lr()[0], loss_avg, loss_pair_lable_avg,
loss_between_pair_avg, loss_gradients_avg, loss_ssim_avg))
# record loss
writer.add_scalars('Loss_group', {'train_loss': loss_avg}, epoch)
# record learning rate
writer.add_scalar('learning rate', scheduler.get_lr()[0], epoch)
if epoch % 10 == 0: # save mask for each 10 epoch
train_mask_result_dir = os.path.join(opt.outtrain, str(time_str), str(epoch))
if not os.path.exists(train_mask_result_dir):
os.makedirs(train_mask_result_dir)
for i in range(0,outputs.size(0)):
outmask1 = outputs[i,0,:,:].squeeze().unsqueeze(-1).repeat(1,1,3)
mask_id1 = lb1[i]
outmask2 = outputs[i, 1, :, :].squeeze().unsqueeze(-1).repeat(1, 1, 3)
mask_id2 = lb2[i]
#--------------------------
output = outputs[i,0,:,:].squeeze().unsqueeze(-1).repeat(1,1,3)
cv2.imwrite(train_mask_result_dir + '/' + str(mask_id1) + '.jpg', output.cpu().data.numpy() * 255)
#---------------------------
cv2.imwrite(train_mask_result_dir +'/' + str(mask_id1) + '.jpg', outmask1.cpu().data.numpy()*255)
cv2.imwrite(train_mask_result_dir + '/' + str(mask_id2) + '.jpg', outmask2.cpu().data.numpy() * 255)
cv2.imwrite(train_mask_result_dir + '/' + str('m_'+ mask_id1) + '.jpg', np.clip((outmask1.cpu().data.numpy()*255 + outmask2.cpu().data.numpy() * 255),0,255))
# for each epoch, record
for name, layer in net.named_parameters():
writer.add_histogram(name + '_grad', layer.grad.cpu().data.numpy(), epoch)
writer.add_histogram(name + '_data', layer.cpu().data.numpy(), epoch)
# ------------------------------------validation ------------------------------------
if epoch == 50:
loss_sigma = loss_gradients_sum =loss_between_pair_sum=loss_pair_lable_sum=loss_ssim_sum=0.0
net.eval()
for i, data in enumerate(valid_loader):
input1, input2, gt1, gt2, lb1, lb2, gtimg = data
input1, input2, gt1, gt2, gtimg = Variable(input1).cuda(), Variable(input2).cuda(), Variable(
gt1).cuda(), Variable(gt2).cuda(), Variable(gtimg).cuda().requires_grad_(False)
inputs = torch.cat((input1, input2), 1) # concat input pair image tensor
labels = torch.cat((gt1, gt2), 1).requires_grad_(False) # concat ground truth image tensor
# print(labels.size())#torch.Size([16, 2, 128, 128]
# forward, backward, update weights
optimizer.zero_grad()
outputs = net(inputs)
loss_pair_lable = criterion(outputs, labels) # loss for each image prediction and gt
g1 = outputs[:, 0, :, :].unsqueeze(1) * (input1.data) # predict feature_map1
g2 = outputs[:, 1, :, :].unsqueeze(1) * (input2.data) # predict feature_map2
mergeimg = g1 + g2
loss_ssim = 1.0 - ssim_loss(mergeimg.cuda(),gtimg.cuda())
loss_gradients = GL.gradient_loss_merge(mergeimg, gtimg, opt.cuda, device=0)
#loss of 1-A-B
sumpreds = outputs[:, 0, :, :] + outputs[:, 1, :, :]
sumpreds = 1.0 - sumpreds.unsqueeze(1)
zeroimg = torch.zeros(sumpreds.size())
loss_between_pair = criterion_bet(sumpreds.cuda(), zeroimg.cuda())
#loss = loss_pair_lable +2.0* loss_ssim+0.1* loss_gradients +0.1*loss_between_pair
loss = 0.8 * loss_pair_lable + 0.1 * loss_ssim + 0.1 * loss_gradients
loss.backward()
optimizer.step()
loss_sigma += loss.item()
loss_gradients_sum += loss_gradients.item()
#loss_between_pair_sum += loss_between_pair.item()
loss_pair_lable_sum += loss_pair_lable.item()
loss_ssim_sum += loss_ssim.item()
if epoch % 10 == 0:
val_mask_result_dir = os.path.join(opt.outval, str(time_str), str(epoch))
if not os.path.exists(val_mask_result_dir):
os.makedirs(val_mask_result_dir)
for i in range(0,outputs.size(0)):
outmask1 = outputs[i, 0, :, :].squeeze().unsqueeze(-1).repeat(1, 1, 3)
mask_id1 = lb1[i]
outmask2 = outputs[i, 1, :, :].squeeze().unsqueeze(-1).repeat(1, 1, 3)
mask_id2 = lb2[i]
# --------------------------
output = outputs[i, 0, :, :].squeeze().unsqueeze(-1).repeat(1, 1, 3)
cv2.imwrite(train_mask_result_dir + '/' + str(mask_id1) + '.jpg', output.cpu().data.numpy() * 255)
# ---------------------------
cv2.imwrite(val_mask_result_dir + '/' + str(mask_id1) + '.jpg', outmask1.cpu().data.numpy() * 255)
cv2.imwrite(val_mask_result_dir + '/' + str(mask_id2) + '.jpg', outmask2.cpu().data.numpy() * 255)
cv2.imwrite(val_mask_result_dir + '/' + str('m_' + mask_id1) + '.jpg',
np.clip((outmask1.cpu().data.numpy() * 255 + outmask2.cpu().data.numpy() * 255),0,255))
print('epoch {},{} set valid_loss:{:.2%},Loss_pair: {:.4f} Loss_bt: {:.4f} Loss_grads: {:.4f} Loss_ssim: {:.4f}'.format(epoch, 'Valid', loss_sigma / len(valid_loader), \
loss_pair_lable_sum/len(valid_loader),\
loss_between_pair_sum/len(valid_loader), \
loss_gradients_sum / len(valid_loader),\
loss_ssim_sum / len(valid_loader)))
# Loss, accuracy
writer.add_scalars('Loss_group', {'valid_loss': loss_sigma / len(valid_loader)}, epoch)
#--------------------------------------save model----------------
if epoch % opt.ckpt_step == 0:
net_save_path = os.path.join(log_dir, str(epoch) +'_net_params.pkl')
torch.save(net.state_dict(), net_save_path)
print('Finished Training')
# ------------------------------------ step5: save model ------------------------------------
net_save_path = os.path.join(log_dir, str(opt.max_epoch) + '_net_params.pkl')
torch.save(net.state_dict(), net_save_path)