Hello.
I am currently using Julia Versio 1.6.3 on a Platform "OS: Linux (x86_64-pc-linux-gnu) CPU: Intel(R) Xeon(R) Gold 6248 CPU @ 2.50GHz, GPU : CuDevice(0): Tesla T4". I am trying to implement a variational autoencoder called Gradient Origin Networks (GONs). GONs are introduced as a generative model which does not require encoders or hypernetworks. Assume Variational GON model called F. First, a zero vector z_0 is passed through the model F, and then the latent vector initialized as the minus gradient of the loss with respect to this zero vector. Therefore, the latent space is determined by only one gradient step. Let us call this latent vector as z. Then, the network parameters are optimized by using the loss with the reconstruction F(z).
I am currently performing my experiments on MNIST dataset where I linearly interpolated the images to the size of 32x32. The decoding and reparametrization functions are as follows. theta is a vector of model weights.
function reparametrize(mu, logvar)
std = exp.(0.5 .* logvar)
epsilon = convert(Atype, randn(F, size(mu)))
z = mu .+ epsilon .* std
return z
end
function decode(theta, z; batch_size = 64, training = true)
mu = theta[1] * z .+ theta[2]
logvar = theta[3] * z .+ theta[4]
z = reparametrize(mu, logvar)
z = reshape(z, (1, 1, nz, batch_size))
z = deconv4(theta[5], z, mode = 1) .+ theta[6]
z = batchnorm(z, bnmoments(), theta[7]; training = training)
z = Knet.elu.(z)
z = deconv4(theta[8], z, stride = 2, padding = 1, mode = 1) .+ theta[9]
z = batchnorm(z, bnmoments(), theta[10]; training = training)
z = Knet.elu.(z)
z = deconv4(theta[11], z, stride = 2, padding = 1, mode = 1) .+ theta[12]
z = batchnorm(z, bnmoments(), theta[13]; training = training)
z = Knet.elu.(z)
z = deconv4(theta[14], z, stride = 2, padding = 1, mode = 1) .+ theta[15]
x_hat = Knet.sigm.(z)
return x_hat, mu, logvar
end
For the loss, it is used binary cross-entropy and KL-divergence. The code is given as follows.
function BCE(x_tensor,x_hat_tensor)
x = mat(x_tensor)
x_hat = mat(x_hat_tensor)
return -mean(sum((x .* log.(x_hat .+ F(1e-10)) + (1 .- x) .* log.(1 .- x_hat .+ F(1e-10))), dims = 1))
end
function KLD(mu, logvar)
var = exp.(logvar)
std = sqrt.(var)
KL = -0.5 * mean(sum(1 .+ logvar .- (mu .* mu) - exp.(logvar), dims = 1))
return KL
end
function loss(theta, x, z)
x_hat, mu, logvar = decode(theta, z)
L = BCE(x, x_hat) + KLD(mu, logvar)
return L
end
Since there are two steps for GON (1-) Use the gradient w.r.t. origin to determine the latent space z, 2-) Use latent space for reconstruction) I need to track all the gradient w.r.t. model weights from the steps (1) and (2). Therefore, I wrote the following decoding function and loss function for training purpose.
function decode_train(theta, x; batch_size = 64,training = true)
origin = param(Atype(zeros(nz, batch_size)))
derivative_origin = @diff loss(value.(theta), x, origin)
dz = grad(derivative_origin, origin)
z = -value(dz)
x_hat, mu, logvar = decode(theta, origin);
return x_hat, mu, logvar
end
function loss_train(theta, x)
x_hat, mu, logvar = decode_train(theta, x)
L = BCE(x, x_hat) + KLD(mu, logvar)
return L
end
However, I am not able to take the gradient of the " loss_train(theta, x)" function. I am getting the following error when I use the @diff macro of AutoGrad package. How can I handle to train this model which requires a second order derivative (I need the derivative of the function decode_train)?
To reproduce this result, you can run the following notebook :
https://github.com/BariscanBozkurt/Gradient-Origin-Networks/blob/main/GON_Implementation_Issue.ipynb
My code:
@diff loss_train(theta, x)
The error is:
Stacktrace:
[1] copyto!(a::KnetArray{Float32, 4}, b::Base.Broadcast.Broadcasted{Base.Broadcast.Style{AutoGrad.Value}, NTuple{4, Base.OneTo{Int64}}, typeof(identity), Tuple{AutoGrad.Result{KnetArray{Float32, 4}}}})
@ Knet.KnetArrays ~/.julia/packages/Knet/RCkV0/src/knetarrays/broadcast.jl:35
[2] copyto!(x::AutoGrad.Result{KnetArray{Float32, 4}}, y::Base.Broadcast.Broadcasted{Base.Broadcast.Style{AutoGrad.Value}, NTuple{4, Base.OneTo{Int64}}, typeof(identity), Tuple{AutoGrad.Result{KnetArray{Float32, 4}}}})
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:55
[3] materialize!
@ ./broadcast.jl:894 [inlined]
[4] materialize!
@ ./broadcast.jl:891 [inlined]
[5] materialize!(dest::AutoGrad.Result{KnetArray{Float32, 4}}, x::AutoGrad.Result{KnetArray{Float32, 4}})
@ Base.Broadcast ./broadcast.jl:887
[6] batchnorm4_back(g::KnetArray{Float32, 4}, x::AutoGrad.Result{KnetArray{Float32, 4}}, dy::AutoGrad.Result{KnetArray{Float32, 4}}; eps::Float64, training::Bool, cache::Knet.Ops20.BNCache, moments::Knet.Ops20.BNMoments, o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ Knet.Ops20 ~/.julia/packages/Knet/RCkV0/src/ops20/batchnorm.jl:262
[7] #batchnorm4x#191
@ ~/.julia/packages/Knet/RCkV0/src/ops20/batchnorm.jl:317 [inlined]
[8] #back#210
@ ./none:0 [inlined]
[9] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:165
[10] differentiate
@ ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135 [inlined]
[11] decode_train(theta::Vector{Any}, x::KnetArray{Float32, 4}; batch_size::Int64, training::Bool)
@ Main ./In[14]:4
[12] decode_train
@ ./In[14]:2 [inlined]
[13] loss_train(theta::Vector{Any}, x::KnetArray{Float32, 4})
@ Main ./In[16]:2
[14] (::var"#16#17")()
@ Main ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:205
[15] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:144
[16] differentiate(::Function)
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135
[17] top-level scope
@ In[18]:1
[18] eval
@ ./boot.jl:360 [inlined]
[19] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:1116
[20] softscope_include_string(m::Module, code::String, filename::String)
@ SoftGlobalScope ~/.julia/packages/SoftGlobalScope/u4UzH/src/SoftGlobalScope.jl:65
[21] execute_request(socket::ZMQ.Socket, msg::IJulia.Msg)
@ IJulia ~/.julia/packages/IJulia/e8kqU/src/execute_request.jl:67
[22] #invokelatest#2
@ ./essentials.jl:708 [inlined]
[23] invokelatest
@ ./essentials.jl:706 [inlined]
[24] eventloop(socket::ZMQ.Socket)
@ IJulia ~/.julia/packages/IJulia/e8kqU/src/eventloop.jl:8
[25] (::IJulia.var"#15#18")()
@ IJulia ./task.jl:411
MethodError: no method matching copyto!(::KnetArray{Float32, 4}, ::AutoGrad.Result{KnetArray{Float32, 4}})
Closest candidates are:
copyto!(::KnetArray{T, N} where N, ::Array{T, N} where N) where T at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:10
copyto!(::KnetArray{T, N} where N, ::Array{S, N} where N) where {T, S} at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:18
copyto!(::KnetArray{T, N} where N, ::KnetArray{T, N} where N) where T at /kuacc/users/bbozkurt15/.julia/packages/Knet/RCkV0/src/knetarrays/copy.jl:9
...
Stacktrace:
[1] differentiate(::Function; o::Base.Iterators.Pairs{Union{}, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:148
[2] differentiate(::Function)
@ AutoGrad ~/.julia/packages/AutoGrad/TTpeo/src/core.jl:135
[3] top-level scope
@ In[18]:1
[4] eval
@ ./boot.jl:360 [inlined]
[5] include_string(mapexpr::typeof(REPL.softscope), mod::Module, code::String, filename::String)
@ Base ./loading.jl:1116