Skip to content

Commit

Permalink
Merge pull request #52 from DrChainsaw/weightinsert
Browse files Browse the repository at this point in the history
Add option to inject function for determining values of new neurons created by select
  • Loading branch information
DrChainsaw authored Jun 1, 2020
2 parents 8e2b75e + ce88afb commit ca9eb47
Show file tree
Hide file tree
Showing 8 changed files with 212 additions and 107 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name = "NaiveNASflux"
uuid = "85610aed-7d32-5e57-bb50-4c2e1c9e7997"
version = "1.2.1"
version = "1.3.0"

[deps]
Flux = "587475ba-b771-5e3f-ad9e-33799f191a9c"
Expand Down
118 changes: 60 additions & 58 deletions src/mutable.jl
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ NaiveNASlib.nout(m::AbstractMutableComp) = nout(layer(m))
# Leave some room to override clone
NaiveNASlib.clone(m::AbstractMutableComp;cf=clone) = typeof(m)(map(cf, getfield.(m, fieldnames(typeof(m))))...)

NaiveNASlib.mutate_inputs(m::AbstractMutableComp, inputs::AbstractArray{<:Integer,1}...) = mutate_inputs(wrapped(m), inputs...)
NaiveNASlib.mutate_outputs(m::AbstractMutableComp, outputs) = mutate_outputs(wrapped(m), outputs)
NaiveNASlib.mutate_inputs(m::AbstractMutableComp, inputs::AbstractArray{<:Integer,1}...;kwargs...) = mutate_inputs(wrapped(m), inputs...;kwargs...)
NaiveNASlib.mutate_outputs(m::AbstractMutableComp, outputs; kwargs...) = mutate_outputs(wrapped(m), outputs; kwargs...)

mutate_weights(m::AbstractMutableComp, w) = mutate_weights(wrapped(m), w)

Expand All @@ -42,67 +42,67 @@ wrapped(m::MutableLayer) = m.layer
layer(m::MutableLayer) = wrapped(m)
layertype(m::MutableLayer) = layertype(layer(m))

function NaiveNASlib.mutate_inputs(m::MutableLayer, inputs::AbstractArray{<:Integer,1}...)
function NaiveNASlib.mutate_inputs(m::MutableLayer, inputs::AbstractArray{<:Integer,1}...; insert=neuroninsert)
@assert length(inputs) == 1 "Only one input per layer!"
mutate(layertype(m), m, inputs=inputs[1])
mutate(layertype(m), m; inputs=inputs[1], insert=insert)
end

NaiveNASlib.mutate_outputs(m::MutableLayer, outputs) = mutate(layertype(m), m, outputs=outputs)
NaiveNASlib.mutate_outputs(m::MutableLayer, outputs; insert=neuroninsert) = mutate(layertype(m), m; outputs=outputs, insert=insert)

mutate_weights(m::MutableLayer, w) = mutate(layertype(m), m, other=w)

mutate(m::MutableLayer; inputs, outputs, other = l -> ()) = mutate(layertype(m), m, inputs=inputs, outputs=outputs, other=other)
mutate(m::MutableLayer; inputs, outputs, other = l -> (), insert=neuroninsert) = mutate(layertype(m), m, inputs=inputs, outputs=outputs, other=other, insert=insert)

function mutate(::FluxParLayer, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other= l -> ())
function mutate(lt::FluxParLayer, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other= l -> (), insert=neuroninsert)
l = layer(m)
otherdims = other(l)
w = select(weights(l), outdim(l) => outputs, indim(l) => inputs, otherdims...)
b = select(bias(l), 1 => outputs)
w = select(weights(l), indim(l) => inputs, outdim(l) => outputs, otherdims...; newfun=insert(lt, WeightParam()))
b = select(bias(l), 1 => outputs; newfun=insert(lt, BiasParam()))
newlayer(m, w, b, otherpars(other, l))
end
otherpars(o, l) = ()

function mutate(::FluxDepthwiseConv, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other= l -> ())
function mutate(lt::FluxDepthwiseConv, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other= l -> (), insert=neuroninsert)
l = layer(m)
otherdims = other(l)
weightouts = map(Iterators.partition(outputs, length(inputs))) do group
all(group .< 0) && return group[1]
return (maximum(group) - 1) ÷ length(inputs) + 1
end

w = select(weights(l), outdim(l) => weightouts, indim(l) => inputs, otherdims...)
b = select(bias(l), 1 => outputs)
w = select(weights(l), indim(l) => inputs, outdim(l) => weightouts, otherdims...; newfun=insert(lt, WeightParam()))
b = select(bias(l), 1 => outputs; newfun=insert(lt, BiasParam()))
newlayer(m, w, b, otherpars(other, l))
end

function mutate(t::FluxRecurrent, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other=missing)
function mutate(lt::FluxRecurrent, m::MutableLayer; inputs=1:nin(m), outputs=1:nout(m), other=missing, insert=neuroninsert)
l = layer(m)
outputs_scaled = mapfoldl(vcat, 1:outscale(l)) do i
offs = (i-1) * nout(l)
return map(x -> x > 0 ? x + offs : x, outputs)
end

wi = select(weights(l), outdim(l) => outputs_scaled, indim(l) => inputs)
wh = select(hiddenweights(l), 1 => outputs_scaled, 2 => outputs)
b = select(bias(l), 1 => outputs_scaled)
mutate_recurrent_state(t, m, outputs, wi, wh, b)
wi = select(weights(l), indim(l) => inputs, outdim(l) => outputs_scaled, newfun=insert(lt, WeightParam()))
wh = select(hiddenweights(l), 2 => outputs, 1 => outputs_scaled, newfun=insert(lt, RecurrentWeightParam()))
b = select(bias(l), 1 => outputs_scaled, newfun=insert(lt, BiasParam()))
mutate_recurrent_state(lt, m, outputs, wi, wh, b, insert)
end

function mutate_recurrent_state(::FluxRecurrent, m::MutableLayer, outputs, wi, wh, b)
function mutate_recurrent_state(lt::FluxRecurrent, m::MutableLayer, outputs, wi, wh, b, insert)
l = layer(m)
h = select(hiddenstate(l), 1 => outputs)
s = select(state(l), 1 => outputs)
h = select(hiddenstate(l), 1 => outputs, newfun=insert(lt, RecurrentState()))
s = select(state(l), 1 => outputs; newfun=insert(lt, RecurrentState()))

cellnew = setproperties(layer(m).cell, (Wi=wi, Wh=wh, b = b, h = h))
lnew = setproperties(layer(m), (cell=cellnew, state = s))
m.layer = lnew
end

function mutate_recurrent_state(::FluxLstm, m::MutableLayer, outputs, wi, wh, b)
function mutate_recurrent_state(lt::FluxLstm, m::MutableLayer, outputs, wi, wh, b, insert)
l = layer(m)
hcurr, scurr = hiddenstate(l), state(l)
hc = select.(hcurr, repeat([1 => outputs], length(hcurr)))
s = select.(scurr, repeat([1 => outputs], length(scurr)))
hc = select.(hcurr, repeat([1 => outputs], length(hcurr)); newfun=insert(lt, RecurrentState()))
s = select.(scurr, repeat([1 => outputs], length(scurr)); newfun=insert(lt, RecurrentState()))

cellnew = setproperties(layer(m).cell, (Wi=wi, Wh=wh, b = b, h = hc[1], c = hc[2]))
lnew = setproperties(layer(m), (cell=cellnew, state = tuple(s...)))
Expand All @@ -112,39 +112,38 @@ function mutate_recurrent_state(::FluxLstm, m::MutableLayer, outputs, wi, wh, b)
end


function mutate(t::FluxParInvLayer, m::MutableLayer; inputs=missing, outputs=missing, other=missing)
function mutate(t::FluxParInvLayer, m::MutableLayer; inputs=missing, outputs=missing, other=missing, insert=neuroninsert)
@assert any(ismissing.((inputs, outputs))) || inputs == outputs "Try to mutate $inputs and $outputs for invariant layer $(m)!"
ismissing(inputs) || return mutate(t, m, inputs)
ismissing(outputs) || return mutate(t, m, outputs)
ismissing(inputs) || return mutate(t, m, inputs; insert=insert)
ismissing(outputs) || return mutate(t, m, outputs; insert=insert)
end

function mutate(::FluxDiagonal, m::MutableLayer, inds)
function mutate(lt::FluxDiagonal, m::MutableLayer, inds; insert=neuroninsert)
l = layer(m)
w = select(weights(l), 1 => inds)
b = select(bias(l), 1 => inds)
w = select(weights(l), 1 => inds, newfun=insert(lt, WeightParam()))
b = select(bias(l), 1 => inds; newfun=insert(lt, BiasParam()))
newlayer(m, w, b)
end

function mutate(::FluxLayerNorm, m::MutableLayer, inds)
function mutate(::FluxLayerNorm, m::MutableLayer, inds; insert=neuroninsert)
# LayerNorm is only a wrapped Diagonal. Just mutate the Diagonal and make a new LayerNorm of it
proxy = MutableLayer(layer(m).diag)
mutate(proxy, inputs=inds, outputs=inds, other=l->())
mutate(proxy; inputs=inds, outputs=inds, other=l->(), insert=insert)
m.layer = LayerNorm(layer(proxy))
end

function mutate(::FluxParNorm, m::MutableLayer, inds)
# Good? bad? I'm the guy who assumes mean and std type parameters will be visited in a certain order and uses a closure for that assumption
ismean = false
function parselect(x::AbstractArray)
ismean = !ismean
return select(x, 1 => inds; insval = (ismean ? 0 : 1))
end
parselect(x) = x
function mutate(lt::FluxParNorm, m::MutableLayer, inds; insert=neuroninsert)

# Filter out the parameters which need to change and decide for each name (e.g. γ, β etc) what to do (typically insert 1 for scaling things and 0 for offset things)
parselect(p::Pair) = parselect(p...)
parselect(pname, x::AbstractArray) = select(x, 1 => inds; newfun = neuroninsert(lt, pname))
parselect(pname, x) = x

m.layer = Flux.fmap(parselect, m.layer)
fs, re = Flux.functor(m.layer)
m.layer = re(map(parselect, pairs(fs) |> collect))
end

function mutate(::FluxGroupNorm, m::MutableLayer, inds)
function mutate(lt::FluxGroupNorm, m::MutableLayer, inds; insert=neuroninsert)

l = m.layer
ngroups = l.G
Expand All @@ -164,13 +163,12 @@ function mutate(::FluxGroupNorm, m::MutableLayer, inds)

sizetoinds = Dict(nin(l) => inds, l.G => inds_groups)

ismean = false
function parselect(x::AbstractArray)
ismean = !ismean
return select(x, 1 => sizetoinds[length(x)]; insval = (ismean ? 0 : 1))
end
parselect(x) = x
m.layer = Flux.fmap(parselect, m.layer)
parselect(p::Pair) = parselect(p...)
parselect(pname, x::AbstractArray) = select(x, 1 => sizetoinds[length(x)]; newfun = insert(lt, pname))
parselect(pname, x) = x

fs, re = Flux.functor(m.layer)
m.layer = re(map(parselect, pairs(fs) |> collect))
m.layer.G = ngroups
end

Expand Down Expand Up @@ -227,9 +225,10 @@ mutable struct LazyMutable <: AbstractMutableComp
inputs::AbstractVector{<:Integer}
outputs::AbstractVector{<:Integer}
other
insert
end
LazyMutable(m::AbstractMutableComp) = LazyMutable(m, nin(m), nout(m))
LazyMutable(m, nin::Integer, nout::Integer) = LazyMutable(m, 1:nin, 1:nout, m -> ())
LazyMutable(m, nin::Integer, nout::Integer) = LazyMutable(m, 1:nin, 1:nout, m -> (), neuroninsert)

wrapped(m::LazyMutable) = m.mutable
layer(m::LazyMutable) = layer(wrapped(m))
Expand All @@ -249,19 +248,21 @@ dispatch!(m::LazyMutable, mutable::AbstractMutableComp, x...) = mutable(x...)
NaiveNASlib.nin(m::LazyMutable) = length(m.inputs)
NaiveNASlib.nout(m::LazyMutable) = length(m.outputs)

function NaiveNASlib.mutate_inputs(m::LazyMutable, inputs::AbstractArray{<:Integer,1}...)
function NaiveNASlib.mutate_inputs(m::LazyMutable, inputs::AbstractArray{<:Integer,1}...; insert=neuroninsert)
@assert length(inputs) == 1 "Only one input per layer!"
m.inputs == inputs[1] && return

m.insert = insert
m.mutable = ResetLazyMutable(trigger_mutation(m.mutable))
m.inputs = select(m.inputs, 1 => inputs[1], insval=-1)
m.inputs = select(m.inputs, 1 => inputs[1], newfun = (args...) -> -1)
end

function NaiveNASlib.mutate_outputs(m::LazyMutable, outputs::AbstractArray{<:Integer,1})
function NaiveNASlib.mutate_outputs(m::LazyMutable, outputs::AbstractArray{<:Integer,1}; insert=neuroninsert)
outputs == m.outputs && return

m.insert = insert
m.mutable = ResetLazyMutable(trigger_mutation(m.mutable))
m.outputs = select(m.outputs, 1=>outputs, insval = -1)
m.outputs = select(m.outputs, 1=>outputs, newfun = (args...) -> -1)
end

function mutate_weights(m::LazyMutable, w)
Expand All @@ -271,9 +272,9 @@ function mutate_weights(m::LazyMutable, w)
m.other = w
end

NaiveNASlib.mutate_inputs(m::LazyMutable, nin::Integer...) = mutate_inputs(m, trunc_or_pad.(length(m.inputs), nin)...)
NaiveNASlib.mutate_inputs(m::LazyMutable, nin::Integer...; insert=neuroninsert) = mutate_inputs(m, trunc_or_pad.(length(m.inputs), nin)...;insert=insert)

NaiveNASlib.mutate_outputs(m::LazyMutable, nout::Integer) = mutate_outputs(m, trunc_or_pad(length(m.outputs), nout))
NaiveNASlib.mutate_outputs(m::LazyMutable, nout::Integer; insert=neuroninsert) = mutate_outputs(m, trunc_or_pad(length(m.outputs), nout); insert=insert)

function trunc_or_pad(maxselect, size)
res = -ones(Int, size)
Expand Down Expand Up @@ -306,7 +307,7 @@ trigger_mutation(m) = m
trigger_mutation(m::AbstractMutableComp) = MutationTriggered(m)

function dispatch!(lm::LazyMutable, m::MutationTriggered, x...)
mutate(m.wrapped; inputs=lm.inputs, outputs=lm.outputs, other=lm.other)
mutate(m.wrapped; inputs=lm.inputs, outputs=lm.outputs, other=lm.other, insert=lm.insert)
lm.mutable = m.wrapped
return lm(x...)
end
Expand All @@ -332,6 +333,7 @@ function dispatch!(lm::LazyMutable, m::ResetLazyMutable, x...)
lm.inputs = 1:nin(lm)
lm.outputs = 1:nout(lm)
lm.other = m -> ()
lm.insert = neuroninsert
return output
end

Expand All @@ -357,8 +359,8 @@ layertype(i::NoParams) = layertype(layer(i))

LazyMutable(m::NoParams) = m

function NaiveNASlib.mutate_inputs(::NoParams, inputs) end
function NaiveNASlib.mutate_outputs(::NoParams, outputs) end
function NaiveNASlib.mutate_inputs(::NoParams, inputs;insert=missing) end
function NaiveNASlib.mutate_outputs(::NoParams, outputs;insert=missing) end
function mutate_weights(::NoParams, w) end
NaiveNASlib.minΔninfactor(m::NoParams) = minΔninfactor(layertype(m), layer(m))
NaiveNASlib.minΔnoutfactor(m::NoParams) = minΔnoutfactor(layertype(m), layer(m))
6 changes: 3 additions & 3 deletions src/pruning.jl
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@ end

actdim(nd::Integer) = nd - 1

function NaiveNASlib.mutate_outputs(m::ActivationContribution, outputs::AbstractVector{<:Integer})
m.contribution = select(m.contribution, 1 => outputs)
mutate_outputs(wrapped(m), outputs)
function NaiveNASlib.mutate_outputs(m::ActivationContribution, outputs::AbstractVector{<:Integer}; kwargs...)
m.contribution = select(m.contribution, 1 => outputs; newfun = (args...) -> 0)
mutate_outputs(wrapped(m), outputs; kwargs...)
end

"""
Expand Down
52 changes: 40 additions & 12 deletions src/select.jl
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@

select(::Missing, elements_per_dim...; insval = 0) = missing
select(::Missing, elements_per_dim...; newfun = 0) = missing

"""
select(pars::AbstractArray{T,N}, elements_per_dim...; insval = 0) where {T, N}
select(pars::AbstractArray{T,N}, elements_per_dim...; newfun = zeros) where {T, N}
Return a new `AbstractArray{T, N}` which has a subset of the elements of `pars`.
Which elements to select is determined by `elements_per_dim` which is a `Pair{Int, Vector{Int}}` mapping dimension (first memeber) to which elements to select in that dimension (second memeber).
For a single `dim=>elems` pair, the following holds: `selectdim(output, dim, i) == selectdim(pars, dim, elems[i])` if `elems[i]` is positive and `selectdim(output, dim, i) .== insval` if `elems[i]` is negative.
For a single `dim=>elems` pair, the following holds: `selectdim(output, dim, i) == selectdim(pars, dim, elems[i])` if `elems[i]` is positive and `selectdim(output, dim, i) .== newfun(T, dim, size)[j]` if `elems[i]` is the `j:th` negative value and `size` is `sum(elems .< 0)`.
# Examples
```julia-repl
Expand All @@ -20,7 +20,7 @@ julia> pars = reshape(1:3*5, 3,5)
2 5 8 11 14
3 6 9 12 15
julia> NaiveNASflux.select(pars, 1 => [-1, 1,3,-1,2], 2=>[3, -1, 2], insval=-1)
julia> NaiveNASflux.select(pars, 1 => [-1, 1,3,-1,2], 2=>[3, -1, 2], newfun = (T, d, s...) -> -ones(T, s))
5×3 Array{Int64,2}:
-1 -1 -1
7 -1 4
Expand All @@ -29,27 +29,55 @@ julia> pars = reshape(1:3*5, 3,5)
8 -1 5
```
"""
function select(pars::AbstractArray{T,N}, elements_per_dim...; insval = 0) where {T, N}
function select(pars::AbstractArray{T,N}, elements_per_dim...; newfun = randoutzeroin) where {T, N}
psize = collect(size(pars))
assign = repeat(Any[Colon()], N)
access = repeat(Any[Colon()], N)

for de in elements_per_dim
dim = de.first
elements = de.second
for (dim, elements) in elements_per_dim
psize[dim] = length(elements)
end
newpars = similar(pars, psize...)

for (dim, elements) in elements_per_dim
indskeep = filter(ind -> ind > 0, elements)
newmap = elements .> 0
indsmap = elements .> 0
newmap = .!indsmap

psize[dim] = length(newmap)
assign[dim] = findall(newmap)
assign[dim] = findall(indsmap)
access[dim] = indskeep
tsize = copy(psize)
tsize[dim] = sum(newmap)
selectdim(newpars, dim, newmap) .= newfun(T, dim, tsize...)
end
newpars = fill!(similar(pars, psize...), T(0)) .+ T(insval)

newpars[assign...] = pars[access...]
return newpars
end

struct WeightParam end
struct BiasParam end
struct RecurrentWeightParam end
struct RecurrentState end

"""
neuroninsert(lt, partype)
Return a function which creates new parameters for layers of type `lt` to use for [`select`](@Ref).
"""
neuroninsert(t, partype) = randoutzeroin
neuroninsert(t, parname::Symbol) = neuroninsert(t, Val(parname))
neuroninsert(t, name::Val) = randoutzeroin

neuroninsert(lt::FluxParNorm, t::Val) = norminsert(lt, t)
norminsert(::FluxParNorm, ::Union{Val{:β},Val{:μ}}) = (args...) -> 0
norminsert(::FluxParNorm, ::Union{Val{:γ},Val{:σ²}}) = (args...) -> 1

randoutzeroin(T, d, s...) = _randoutzeroin(T,d,s)
_randoutzeroin(T, d, s) = 0
_randoutzeroin(T, d, s::NTuple{2, Int}) = d == indim(FluxDense()) ? 0 : randn(T, s) ./ prod(s)
_randoutzeroin(T, d, s::NTuple{N, Int}) where N = d == indim(FluxConv{N-2}()) ? 0 : randn(T, s) ./ prod(s)


"""
KernelSizeAligned(Δsize)
Expand Down
21 changes: 2 additions & 19 deletions src/vertex.jl
Original file line number Diff line number Diff line change
Expand Up @@ -36,29 +36,12 @@ Flux.@functor CompVertex
# This is a bit of a hack to enable 1) params and 2) gpu. Other uses may not work as expected, especially if one tries to use these methods to view/manipulate things which are not from Flux.

# Problem with using Flux.@functor is that MutationVertices (OutputVertices really) can not be created by just copying their fields as this would create multiple copies of the same vertex if it is input to more than one vertex.
# Instead, we rely in the internals of the vertices to be mutable (e.g MutableLayer).

Flux.functor(a::AbstractVector{<:AbstractVertex}) = Tuple(a), y -> a
Flux.functor(v::AbstractVertex) = (base(v),), y -> v
Flux.functor(g::CompGraph) = Tuple(vertices(g)), y -> g

# Instead, we rely in the internals of the vertices to be mutable (e.g MutableLayer).

# Flux.functor(a::AbstractVector{<:AbstractVertex}) = Tuple(a)
# function Flux.mapchildren(f, a::AbstractVector{<:AbstractVertex})
# f.(a) # Returning this will do no good due to the above
# return a
# end

# Flux.children(v::AbstractVertex) = (base(v),)
# function Flux.mapchildren(f, v::AbstractVertex)
# f.(Flux.children(v)) # Returning this will do no good due to the above
# return v
# end
# Flux.children(g::CompGraph) = Tuple(vertices(g))
# function Flux.mapchildren(f, g::CompGraph)
# f.(Flux.children(g)) # Returning this will do no good due to the above
# return g
# end

"""
mutable(l, in::AbstractVertex; layerfun=LazyMutable, mutation=IoChange, traitfun=validated())
Expand Down
Loading

2 comments on commit ca9eb47

@DrChainsaw
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/16722

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.3.0 -m "<description of version>" ca9eb476420252257bbdc28ecf8c02a88df5b2c2
git push origin v1.3.0

Please sign in to comment.