我需要使用 Flux.jl 在 Julia 中实现 U-Net。最终目标是针对科学问题训练神经网络。作为第一步,我决定尝试使用 KITTI 基准数据集。
由于我对Python有更多的经验,我最初在PyTorch中实现了U-Net,效果非常好。然后,我在学习 Flux.jl 的同时尝试将我的代码翻译成 Julia。不幸的是,我的实现没有按预期工作。
为了简化问题,我缩减规模并尝试使用合成数据在 Julia 中实现最小的 U-Net 模型。但是,我在训练过程中不断遇到以下错误:
ERROR: MethodError: no method matching (::var"#17#19"{var"#loss#18"{…}, Array{…}, Array{…}})(::@NamedTuple{layers::Tuple{…}})
The function #17 exists, but no method is defined for this combination of argument types.
#17 中的数字根据代码而变化。
创建了一个带有一些卷积层和转置卷积层的最小 U-Net 模型。
使用合成的 64x64 输入图像和二进制掩码作为数据集。
尝试使用 Flux.jl 的梯度函数和 logitcrossentropy 损失通过基本训练循环来训练模型。
尝试不同的 U-Net 实现(例如,这个 repo)。
我怀疑问题可能出在:
这是重现该问题的代码: 我正在使用 Julia 版本 1.11.1 和 Flux v0.16.0
using Flux
using Flux: Conv, ConvTranspose, relu, MaxPool, Dense, Chain, params
using Base.Iterators: partition
using Random
using Plots
# Summary:
# This code defines a U-Net architecture for image segmentation using the Flux library in Julia.
# It creates synthetic data, prepares batches, trains the U-Net model, and tests the trained model.
# The problem is to ensure the U-Net model is correctly implemented and trained on the synthetic dataset.
# Define the U-Net architecture
function unet(input_channels::Int, output_channels::Int)
encoder = Chain(
Conv((3, 3), input_channels => 64, pad=1), relu, MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 64 => 128, pad=1), relu, MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 128 => 256, pad=1), relu, MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 256 => 512, pad=1), relu, MaxPool((2, 2), stride=(2, 2))
)
decoder = Chain(
ConvTranspose((3, 3), 512 => 256, stride=2, pad=1), relu,
ConvTranspose((3, 3), 256 => 128, stride=2, pad=1), relu,
ConvTranspose((3, 3), 128 => 64, stride=2, pad=1), relu,
ConvTranspose((3, 3), 64 => output_channels, stride=2, pad=1)
)
return Chain(encoder, decoder, x -> x[:, 1:64, 1:64, :])
end
# Create a synthetic dataset for image segmentation
function create_test_data(num_samples::Int)
data = []
for _ in 1:num_samples
image = rand(Float32, 64, 64, 1)
mask = rand(Bool, 64, 64, 1)
push!(data, (image, mask))
end
return data
end
# Split the synthetic dataset into batches
function prepare_batches(data, batch_size::Int)
batches = []
for batch in partition(data, batch_size)
input_batch = cat([x[1] for x in batch]..., dims=4)
mask_batch = cat([x[2] for x in batch]..., dims=4)
push!(batches, (input_batch, mask_batch))
end
return batches
end
# Implement a training loop for the U-Net model
function train_unet(model, train_data, num_epochs::Int, learning_rate::Float64)
opt = ADAM(learning_rate)
loss(x, y) = Flux.logitcrossentropy(model(x), float(y))
for epoch in 1:num_epochs
for (input_batch, mask_batch) in train_data
gs = gradient(() -> loss(input_batch, mask_batch), Flux.trainable(model))
Flux.Optimise.update!(opt, Flux.trainable(model), gs)
end
println("Epoch $epoch complete")
end
end
# Test a trained U-Net model
function test_unet(model, test_image)
prediction = model(test_image)
plot(plot(test_image[:, :, 1, 1], title="Input Image"),
plot(prediction[:, :, 1, 1], title="Predicted Mask"),
layout=(1, 2))
end
# Example usage
model = unet(1, 1)
data = create_test_data(100)
batches = prepare_batches(data, 8)
train_unet(model, batches, 10, 0.001)
test_image, _ = data[1]
test_unet(model, test_image)
为什么上面的代码会导致上述错误,我该如何修复它?
我尝试确保模型输出和目标的形状匹配,但我怀疑问题在于损失函数或梯度调用。
第一个问题是这个模型不接受数据:
julia> batches[1][1] |> summary # batch of 8 images, 1 channel
"64×64×1×8 Array{Float32, 4}"
julia> model(batches[1][1])
ERROR: BoundsError: attempt to access 49×49×1×8 Array{Float32, 4} at index [1:49, 1:64, 1:64, 1:8]
我认为您的
x -> x[:, 1:64, 1:64, :]
打算修剪图像大小,但作用于 1 个图像轴且通道变暗。 x -> x[1:64, 1:64, :, :]
。
但是,
49×49×1×8
对于这个来说仍然太小了。也许步幅是错误的?这是一个运行版本:
julia> function unet(input_channels::Int, output_channels::Int)
encoder = Chain(
Conv((3, 3), input_channels => 64, pad=1, relu), MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 64 => 128, relu, pad=1), MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 128 => 256, relu, pad=1), MaxPool((2, 2), stride=(2, 2)),
Conv((3, 3), 256 => 512, relu, pad=1), MaxPool((2, 2), stride=(2, 2))
)
decoder = Chain(
ConvTranspose((3, 3), 512 => 256, relu; stride=2, pad=1), # relu inside!
ConvTranspose((3, 3), 256 => 128, relu; stride=2, pad=1),
ConvTranspose((3, 3), 128 => 64, relu, stride=2, pad=1),
ConvTranspose((3, 3), 64 => output_channels, stride=4, pad=1) # changed stride?
)
return Chain(encoder, decoder, x -> x[1:64, 1:64, :, :]) # select on image axes
end;
julia> model = unet(1, 1);
julia> model(batches[1][1]) |> summary
"64×64×1×8 Array{Float32, 4}"
培训应该是这样的。写
ADAM
意味着您可能正在遵循一些非常古老的指南,并且您不应该遵循整个奇怪的隐式 gradient(() -> ..., params(model))
路径。推荐的方式是这样的:
julia> function train_unet(model, train_data, num_epochs::Int, learning_rate::Float64)
# Set up the optimiser for this model:
opt_state = Flux.setup(Adam(learning_rate), model)
# The loss is always an explicit function of the model:
loss(m, x, y) = Flux.logitcrossentropy(m(x), float(y))
for epoch in 1:num_epochs
for (input_batch, mask_batch) in train_data
# Gradient with respect to the model itself:
grads = Flux.gradient(m -> loss(m, input_batch, mask_batch), model)
Flux.update!(opt_state, model, grads[1])
end
@show epoch
end
end;
julia> train_unet(model, batches, 10, 0.001)
epoch = 1
epoch = 2
epoch = 3
epoch = 4
epoch = 5
epoch = 6
epoch = 7
epoch = 8
epoch = 9
epoch = 10
现在你可以尝试一下......当然这只是噪音:
julia> test_image, _ = data[1];
julia> test_image |> summary # lacks a batch dimension
"64×64×1 Array{Float32, 3}"
julia> test_unet(model, test_image)
ERROR: DimensionMismatch: layer Conv((3, 3), 1 => 64, relu, pad=1) expects ndims(input) == 4, but got 64×64×1 Array{Float32, 3}
julia> test_unet(model, reshape(test_image,64,64,1,1)) # `plot` is drawing lines
julia> function test_unet(model, test_image)
prediction = model(test_image)
plot(heatmap(test_image[:, :, 1, 1], title="Input Image"),
heatmap(prediction[:, :, 1, 1], title="Predicted Mask"),
layout=(1, 2))
end
# Example usage
test_unet (generic function with 1 method)
julia> test_unet(model, reshape(test_image,64,64,1,1))