From e0a4610068f5c53885477af25551eb981a82b270 Mon Sep 17 00:00:00 2001 From: Kristian Holme Date: Fri, 25 Oct 2024 15:36:18 +0200 Subject: [PATCH 1/4] updated compat section and removed deprecated Flux.Zeros --- Project.toml | 6 +++--- src/extras/spectral_normalization.jl | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/Project.toml b/Project.toml index e9e8962..5b21751 100644 --- a/Project.toml +++ b/Project.toml @@ -31,8 +31,8 @@ ColorSchemes = "3" Distributions = "0.25" Flux = "0.14" Images = "0.25, 0.26" -POMDPTools = "0.1" -POMDPs = "0.9" +POMDPTools = "1" +POMDPs = "1" Parameters = "0.12" Plots = "1" Reexport = "1" @@ -43,4 +43,4 @@ ValueHistories = "0.5" WeightsAndBiasLogger = "0.5" Zygote = "0.6" cuDNN = "1" -julia = "1.6 - 1.10" +julia = "1.6 - 1.11" diff --git a/src/extras/spectral_normalization.jl b/src/extras/spectral_normalization.jl index e1acab7..a501a08 100644 --- a/src/extras/spectral_normalization.jl +++ b/src/extras/spectral_normalization.jl @@ -62,19 +62,19 @@ struct ConvSN{N,M,F,A,V, I<:Int, VV<:AbstractArray} u::VV # Left vector for power iteration end -function ConvSN(w::AbstractArray{T,N}, b::Union{Flux.Zeros, AbstractVector{T}}, σ = identity; - stride = 1, pad = 0, dilation = 1, n_iterations = 1) where {T,N} - stride = Flux.expand(Val(N-2), stride) - dilation = Flux.expand(Val(N-2), dilation) - pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) - u = randn(Float32, size(to2D(w), 1), 1) - return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) +function ConvSN(w::AbstractArray{T,N}, b::Union{Bool, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1, n_iterations = 1) where {T,N} +stride = Flux.expand(Val(N-2), stride) +dilation = Flux.expand(Val(N-2), dilation) +pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) +u = randn(Float32, size(to2D(w), 1), 1) +return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) end function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; - init = Flux.glorot_uniform, stride = 1, pad = 0, dilation = 1, - weight = Flux.convfilter(k, ch, init = init), bias = Flux.zeros(ch[2]), n_iterations = 1) where N - ConvSN(weight, bias, σ, stride = stride, pad = pad, dilation = dilation, n_iterations = n_iterations) +init = Flux.glorot_uniform, stride = 1, pad = 0, dilation = 1, +weight = Flux.convfilter(k, ch, init = init), bias = false, n_iterations = 1) where N +ConvSN(weight, bias, σ, stride = stride, pad = pad, dilation = dilation, n_iterations = n_iterations) end Flux.@functor ConvSN From 63851948d7298b1dbff0886b834c5de98f7d1560 Mon Sep 17 00:00:00 2001 From: Kristian Holme Date: Fri, 25 Oct 2024 15:56:39 +0200 Subject: [PATCH 2/4] indentation --- src/extras/spectral_normalization.jl | 56 ++++++++++++++-------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/src/extras/spectral_normalization.jl b/src/extras/spectral_normalization.jl index a501a08..7d4e036 100644 --- a/src/extras/spectral_normalization.jl +++ b/src/extras/spectral_normalization.jl @@ -1,13 +1,13 @@ # Power iteration algorithm for computing the spectral norm function power_iteration!(W, u, n_iterations) - v = nothing - for i=1:n_iterations - Wu = W' * u - v = Wu ./ (norm(Wu) + eps(Float32)) - Wv = W * v - u .= Wv ./ (norm(Wv) + eps(Float32)) - end - u, v + v = nothing + for i = 1:n_iterations + Wu = W' * u + v = Wu ./ (norm(Wu) + eps(Float32)) + Wv = W * v + u .= Wv ./ (norm(Wv) + eps(Float32)) + end + u, v end # Compute the maximum singular value @@ -15,15 +15,15 @@ msv(u, v, W) = u' * W * v ## Dense Layer with Spectral Normalization -struct DenseSN{F,S<:AbstractMatrix, B, I<:Int, VV<:AbstractArray} +struct DenseSN{F,S<:AbstractMatrix,B,I<:Int,VV<:AbstractArray} weight::S bias::B σ::F n_iterations::I # Number of power iterations for computing max singular value u::VV # Left vector for power iteration - function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W,1), 1)) where {M<:AbstractMatrix, F, I<:Int, VV<:AbstractArray} - b = Flux.create_bias(W, bias, size(W,1)) - new{F,M,typeof(b), I, VV}(W, b, σ, n_iterations, u) + function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W, 1), 1)) where {M<:AbstractMatrix,F,I<:Int,VV<:AbstractArray} + b = Flux.create_bias(W, bias, size(W, 1)) + new{F,M,typeof(b),I,VV}(W, b, σ, n_iterations, u) end end @@ -38,10 +38,10 @@ Flux.trainable(a::DenseSN) = (a.weight, a.bias) function (a::DenseSN)(x::AbstractVecOrMat) W, b, σ = a.weight, a.bias, a.σ u, v = ignore_derivatives(() -> power_iteration!(W, a.u, a.n_iterations)) - σ.((W ./ msv(u, v, W))*x .+ b) + σ.((W ./ msv(u, v, W)) * x .+ b) end -(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...) +(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x, 1), :)), :, size(x)[2:end]...) function Base.show(io::IO, l::DenseSN) print(io, "DenseSN(", size(l.weight, 2), ", ", size(l.weight, 1)) @@ -51,7 +51,7 @@ end ## Convluational layer with Spectral Normalization -struct ConvSN{N,M,F,A,V, I<:Int, VV<:AbstractArray} +struct ConvSN{N,M,F,A,V,I<:Int,VV<:AbstractArray} σ::F weight::A bias::V @@ -62,19 +62,19 @@ struct ConvSN{N,M,F,A,V, I<:Int, VV<:AbstractArray} u::VV # Left vector for power iteration end -function ConvSN(w::AbstractArray{T,N}, b::Union{Bool, AbstractVector{T}}, σ = identity; - stride = 1, pad = 0, dilation = 1, n_iterations = 1) where {T,N} -stride = Flux.expand(Val(N-2), stride) -dilation = Flux.expand(Val(N-2), dilation) -pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) -u = randn(Float32, size(to2D(w), 1), 1) -return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) +function ConvSN(w::AbstractArray{T,N}, b::Union{Bool,AbstractVector{T}}, σ=identity; + stride=1, pad=0, dilation=1, n_iterations=1) where {T,N} + stride = Flux.expand(Val(N - 2), stride) + dilation = Flux.expand(Val(N - 2), dilation) + pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) + u = randn(Float32, size(to2D(w), 1), 1) + return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) end -function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; -init = Flux.glorot_uniform, stride = 1, pad = 0, dilation = 1, -weight = Flux.convfilter(k, ch, init = init), bias = false, n_iterations = 1) where N -ConvSN(weight, bias, σ, stride = stride, pad = pad, dilation = dilation, n_iterations = n_iterations) +function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ=identity; + init=Flux.glorot_uniform, stride=1, pad=0, dilation=1, + weight=Flux.convfilter(k, ch, init=init), bias=false, n_iterations=1) where {N} + ConvSN(weight, bias, σ, stride=stride, pad=pad, dilation=dilation, n_iterations=n_iterations) end Flux.@functor ConvSN @@ -82,7 +82,7 @@ Flux.@functor ConvSN Flux.trainable(a::ConvSN) = (a.weight, a.bias) function (c::ConvSN)(x::AbstractArray) - σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1) + σ, b = c.σ, reshape(c.bias, ntuple(_ -> 1, length(c.stride))..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) u, v = ignore_derivatives(() -> power_iteration!(to2D(c.weight), c.u, c.n_iterations)) σ.(conv(x, c.weight ./ msv(u, v, to2D(c.weight)), cdims) .+ b) @@ -90,7 +90,7 @@ end function Base.show(io::IO, l::ConvSN) print(io, "ConvSN(", size(l.weight)[1:ndims(l.weight)-2]) - print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) + print(io, ", ", size(l.weight, ndims(l.weight) - 1), "=>", size(l.weight, ndims(l.weight))) l.σ == identity || print(io, ", ", l.σ) print(io, ")") end \ No newline at end of file From 8a131441b6f3ecb2b8efa460a892948822e5759f Mon Sep 17 00:00:00 2001 From: Kristian Holme Date: Fri, 25 Oct 2024 15:59:57 +0200 Subject: [PATCH 3/4] fix mistaken autoformatting --- src/extras/spectral_normalization.jl | 102 +++++++++++++-------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/src/extras/spectral_normalization.jl b/src/extras/spectral_normalization.jl index 7d4e036..96d36cc 100644 --- a/src/extras/spectral_normalization.jl +++ b/src/extras/spectral_normalization.jl @@ -1,11 +1,11 @@ # Power iteration algorithm for computing the spectral norm function power_iteration!(W, u, n_iterations) v = nothing - for i = 1:n_iterations - Wu = W' * u - v = Wu ./ (norm(Wu) + eps(Float32)) - Wv = W * v - u .= Wv ./ (norm(Wv) + eps(Float32)) + for i=1:n_iterations + Wu = W' * u + v = Wu ./ (norm(Wu) + eps(Float32)) + Wv = W * v + u .= Wv ./ (norm(Wv) + eps(Float32)) end u, v end @@ -15,20 +15,20 @@ msv(u, v, W) = u' * W * v ## Dense Layer with Spectral Normalization -struct DenseSN{F,S<:AbstractMatrix,B,I<:Int,VV<:AbstractArray} - weight::S - bias::B - σ::F - n_iterations::I # Number of power iterations for computing max singular value - u::VV # Left vector for power iteration - function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W, 1), 1)) where {M<:AbstractMatrix,F,I<:Int,VV<:AbstractArray} - b = Flux.create_bias(W, bias, size(W, 1)) - new{F,M,typeof(b),I,VV}(W, b, σ, n_iterations, u) - end +struct DenseSN{F,S<:AbstractMatrix, B, I<:Int, VV<:AbstractArray} +weight::S +bias::B +σ::F +n_iterations::I # Number of power iterations for computing max singular value +u::VV # Left vector for power iteration +function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W,1), 1)) where {M<:AbstractMatrix, F, I<:Int, VV<:AbstractArray} + b = Flux.create_bias(W, bias, size(W,1)) + new{F,M,typeof(b), I, VV}(W, b, σ, n_iterations, u) +end end function DenseSN(in::Integer, out::Integer, σ=identity; init=Flux.glorot_uniform, bias=true, n_iterations=1, u=randn(Float32, out, 1)) - DenseSN(init(out, in), bias, σ, n_iterations, u) +DenseSN(init(out, in), bias, σ, n_iterations, u) end Flux.@functor DenseSN @@ -36,45 +36,45 @@ Flux.@functor DenseSN Flux.trainable(a::DenseSN) = (a.weight, a.bias) function (a::DenseSN)(x::AbstractVecOrMat) - W, b, σ = a.weight, a.bias, a.σ - u, v = ignore_derivatives(() -> power_iteration!(W, a.u, a.n_iterations)) - σ.((W ./ msv(u, v, W)) * x .+ b) +W, b, σ = a.weight, a.bias, a.σ +u, v = ignore_derivatives(() -> power_iteration!(W, a.u, a.n_iterations)) +σ.((W ./ msv(u, v, W))*x .+ b) end -(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x, 1), :)), :, size(x)[2:end]...) +(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...) function Base.show(io::IO, l::DenseSN) - print(io, "DenseSN(", size(l.weight, 2), ", ", size(l.weight, 1)) - l.σ == identity || print(io, ", ", l.σ) - print(io, ")") +print(io, "DenseSN(", size(l.weight, 2), ", ", size(l.weight, 1)) +l.σ == identity || print(io, ", ", l.σ) +print(io, ")") end ## Convluational layer with Spectral Normalization -struct ConvSN{N,M,F,A,V,I<:Int,VV<:AbstractArray} - σ::F - weight::A - bias::V - stride::NTuple{N,Int} - pad::NTuple{M,Int} - dilation::NTuple{N,Int} - n_iterations::I # Number of power iterations for computing max singular value - u::VV # Left vector for power iteration +struct ConvSN{N,M,F,A,V, I<:Int, VV<:AbstractArray} +σ::F +weight::A +bias::V +stride::NTuple{N,Int} +pad::NTuple{M,Int} +dilation::NTuple{N,Int} +n_iterations::I # Number of power iterations for computing max singular value +u::VV # Left vector for power iteration end -function ConvSN(w::AbstractArray{T,N}, b::Union{Bool,AbstractVector{T}}, σ=identity; - stride=1, pad=0, dilation=1, n_iterations=1) where {T,N} - stride = Flux.expand(Val(N - 2), stride) - dilation = Flux.expand(Val(N - 2), dilation) - pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) - u = randn(Float32, size(to2D(w), 1), 1) - return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) +function ConvSN(w::AbstractArray{T,N}, b::Union{Bool, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1, n_iterations = 1) where {T,N} +stride = Flux.expand(Val(N-2), stride) +dilation = Flux.expand(Val(N-2), dilation) +pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) +u = randn(Float32, size(to2D(w), 1), 1) +return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) end -function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ=identity; - init=Flux.glorot_uniform, stride=1, pad=0, dilation=1, - weight=Flux.convfilter(k, ch, init=init), bias=false, n_iterations=1) where {N} - ConvSN(weight, bias, σ, stride=stride, pad=pad, dilation=dilation, n_iterations=n_iterations) +function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = Flux.glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = Flux.convfilter(k, ch, init = init), bias = false, n_iterations = 1) where N +ConvSN(weight, bias, σ, stride = stride, pad = pad, dilation = dilation, n_iterations = n_iterations) end Flux.@functor ConvSN @@ -82,15 +82,15 @@ Flux.@functor ConvSN Flux.trainable(a::ConvSN) = (a.weight, a.bias) function (c::ConvSN)(x::AbstractArray) - σ, b = c.σ, reshape(c.bias, ntuple(_ -> 1, length(c.stride))..., :, 1) - cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) - u, v = ignore_derivatives(() -> power_iteration!(to2D(c.weight), c.u, c.n_iterations)) - σ.(conv(x, c.weight ./ msv(u, v, to2D(c.weight)), cdims) .+ b) +σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1) +cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) +u, v = ignore_derivatives(() -> power_iteration!(to2D(c.weight), c.u, c.n_iterations)) +σ.(conv(x, c.weight ./ msv(u, v, to2D(c.weight)), cdims) .+ b) end function Base.show(io::IO, l::ConvSN) - print(io, "ConvSN(", size(l.weight)[1:ndims(l.weight)-2]) - print(io, ", ", size(l.weight, ndims(l.weight) - 1), "=>", size(l.weight, ndims(l.weight))) - l.σ == identity || print(io, ", ", l.σ) - print(io, ")") +print(io, "ConvSN(", size(l.weight)[1:ndims(l.weight)-2]) +print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) +l.σ == identity || print(io, ", ", l.σ) +print(io, ")") end \ No newline at end of file From 9e62784c36b8b20f3647bc2455d11f5ee9c8c5e9 Mon Sep 17 00:00:00 2001 From: Kristian Holme Date: Fri, 25 Oct 2024 15:59:57 +0200 Subject: [PATCH 4/4] fix mistaken autoformatting --- src/extras/spectral_normalization.jl | 50 ++++++++++++++-------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/extras/spectral_normalization.jl b/src/extras/spectral_normalization.jl index 7d4e036..09f929d 100644 --- a/src/extras/spectral_normalization.jl +++ b/src/extras/spectral_normalization.jl @@ -1,13 +1,13 @@ # Power iteration algorithm for computing the spectral norm function power_iteration!(W, u, n_iterations) - v = nothing - for i = 1:n_iterations - Wu = W' * u - v = Wu ./ (norm(Wu) + eps(Float32)) - Wv = W * v - u .= Wv ./ (norm(Wv) + eps(Float32)) - end - u, v + v = nothing + for i=1:n_iterations + Wu = W' * u + v = Wu ./ (norm(Wu) + eps(Float32)) + Wv = W * v + u .= Wv ./ (norm(Wv) + eps(Float32)) + end + u, v end # Compute the maximum singular value @@ -15,15 +15,15 @@ msv(u, v, W) = u' * W * v ## Dense Layer with Spectral Normalization -struct DenseSN{F,S<:AbstractMatrix,B,I<:Int,VV<:AbstractArray} +struct DenseSN{F,S<:AbstractMatrix, B, I<:Int, VV<:AbstractArray} weight::S bias::B σ::F n_iterations::I # Number of power iterations for computing max singular value u::VV # Left vector for power iteration - function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W, 1), 1)) where {M<:AbstractMatrix,F,I<:Int,VV<:AbstractArray} - b = Flux.create_bias(W, bias, size(W, 1)) - new{F,M,typeof(b),I,VV}(W, b, σ, n_iterations, u) + function DenseSN(W::M, bias=true, σ::F=identity, n_iterations::I=1, u::VV=randn(Float32, size(W,1), 1)) where {M<:AbstractMatrix, F, I<:Int, VV<:AbstractArray} + b = Flux.create_bias(W, bias, size(W,1)) + new{F,M,typeof(b), I, VV}(W, b, σ, n_iterations, u) end end @@ -38,10 +38,10 @@ Flux.trainable(a::DenseSN) = (a.weight, a.bias) function (a::DenseSN)(x::AbstractVecOrMat) W, b, σ = a.weight, a.bias, a.σ u, v = ignore_derivatives(() -> power_iteration!(W, a.u, a.n_iterations)) - σ.((W ./ msv(u, v, W)) * x .+ b) + σ.((W ./ msv(u, v, W))*x .+ b) end -(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x, 1), :)), :, size(x)[2:end]...) +(a::DenseSN)(x::AbstractArray) = reshape(a(reshape(x, size(x,1), :)), :, size(x)[2:end]...) function Base.show(io::IO, l::DenseSN) print(io, "DenseSN(", size(l.weight, 2), ", ", size(l.weight, 1)) @@ -51,7 +51,7 @@ end ## Convluational layer with Spectral Normalization -struct ConvSN{N,M,F,A,V,I<:Int,VV<:AbstractArray} +struct ConvSN{N,M,F,A,V, I<:Int, VV<:AbstractArray} σ::F weight::A bias::V @@ -62,19 +62,19 @@ struct ConvSN{N,M,F,A,V,I<:Int,VV<:AbstractArray} u::VV # Left vector for power iteration end -function ConvSN(w::AbstractArray{T,N}, b::Union{Bool,AbstractVector{T}}, σ=identity; - stride=1, pad=0, dilation=1, n_iterations=1) where {T,N} - stride = Flux.expand(Val(N - 2), stride) - dilation = Flux.expand(Val(N - 2), dilation) +function ConvSN(w::AbstractArray{T,N}, b::Union{Bool, AbstractVector{T}}, σ = identity; + stride = 1, pad = 0, dilation = 1, n_iterations = 1) where {T,N} + stride = Flux.expand(Val(N-2), stride) + dilation = Flux.expand(Val(N-2), dilation) pad = Flux.calc_padding(Conv, pad, size(w)[1:N-2], dilation, stride) u = randn(Float32, size(to2D(w), 1), 1) return ConvSN(σ, w, b, stride, pad, dilation, n_iterations, u) end -function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ=identity; - init=Flux.glorot_uniform, stride=1, pad=0, dilation=1, - weight=Flux.convfilter(k, ch, init=init), bias=false, n_iterations=1) where {N} - ConvSN(weight, bias, σ, stride=stride, pad=pad, dilation=dilation, n_iterations=n_iterations) +function ConvSN(k::NTuple{N,Integer}, ch::Pair{<:Integer,<:Integer}, σ = identity; + init = Flux.glorot_uniform, stride = 1, pad = 0, dilation = 1, + weight = Flux.convfilter(k, ch, init = init), bias = false, n_iterations = 1) where N + ConvSN(weight, bias, σ, stride = stride, pad = pad, dilation = dilation, n_iterations = n_iterations) end Flux.@functor ConvSN @@ -82,7 +82,7 @@ Flux.@functor ConvSN Flux.trainable(a::ConvSN) = (a.weight, a.bias) function (c::ConvSN)(x::AbstractArray) - σ, b = c.σ, reshape(c.bias, ntuple(_ -> 1, length(c.stride))..., :, 1) + σ, b = c.σ, reshape(c.bias, ntuple(_->1, length(c.stride))..., :, 1) cdims = DenseConvDims(x, c.weight; stride=c.stride, padding=c.pad, dilation=c.dilation) u, v = ignore_derivatives(() -> power_iteration!(to2D(c.weight), c.u, c.n_iterations)) σ.(conv(x, c.weight ./ msv(u, v, to2D(c.weight)), cdims) .+ b) @@ -90,7 +90,7 @@ end function Base.show(io::IO, l::ConvSN) print(io, "ConvSN(", size(l.weight)[1:ndims(l.weight)-2]) - print(io, ", ", size(l.weight, ndims(l.weight) - 1), "=>", size(l.weight, ndims(l.weight))) + print(io, ", ", size(l.weight, ndims(l.weight)-1), "=>", size(l.weight, ndims(l.weight))) l.σ == identity || print(io, ", ", l.σ) print(io, ")") end \ No newline at end of file