From 16d7c9fac1e9c873da57d03bf525033974c0eaa5 Mon Sep 17 00:00:00 2001 From: Deniz Yuret Date: Sat, 5 Dec 2020 12:18:42 +0300 Subject: [PATCH 1/3] gcnode debug version --- src/autograd_gpu/gcnode.jl | 70 ++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 7 deletions(-) diff --git a/src/autograd_gpu/gcnode.jl b/src/autograd_gpu/gcnode.jl index 9f5eeb5a1..e3b2acd02 100644 --- a/src/autograd_gpu/gcnode.jl +++ b/src/autograd_gpu/gcnode.jl @@ -68,20 +68,28 @@ function gcnode(n::Node, tape::Tape) gcnode_init(tape) end tape isa Tape || return - ni = gcnode_index[objectid(n)] - if n.Value isa Result - for c in cuarrays(n.outgrad); gcnode_setindex!(c, ni); end - end @inbounds for i in 1:length(n.parents); isassigned(n.parents, i) || continue parent = n.parents[i] if parent.Value isa Result - pi = gcnode_index[objectid(parent)] - for c in cuarrays(parent.outgrad); gcnode_setindex!(c, pi); end + pi = get(gcnode_index, objectid(parent), 0) + if pi > 0 + for c in cuarrays(parent.outgrad); gcnode_setindex!(c, pi); end + else + gcnode_debug_parent(n, tape, i) + end else for c in cuarrays(parent.outgrad); gcnode_setindex!(c,0); end # protect Params end end + ni = get(gcnode_index, objectid(n), 0) + if ni == 0 + gcnode_debug_node(n, tape) + return + end + if n.Value isa Result + for c in cuarrays(n.outgrad); gcnode_setindex!(c, ni); end + end while !isempty(gcnode_queue) && peek(gcnode_queue)[2] >= ni ## 5.62μs (cid,v) = dequeue_pair!(gcnode_queue) c = gcnode_dict[cid].value @@ -96,5 +104,53 @@ end const gcnode_null = Result{Nothing}(nothing,nothing,nothing,nothing) +function gcnode_debug_node(n, tape) + println("WARNING: Cannot find node $(objectid(n)) in gcnode_index") + ni = findfirst(isequal(n), tape.list) + if ni === nothing + println("The node does not appear on tape.list") + else + println("The node appears on index $ni on tape.list") + end + nj = findfirst(isequal(n), tape.dict) + if nj === nothing + println("The node does not appear on tape.dict") + else + println("The node appears on tape.dict") + end + @show sort(collect(keys(gcnode_index))) == sort(objectid.(tape.list)) + @show sort(collect(keys(gcnode_index))) == sort(objectid.(collect(values(tape.dict)))) + @show sort(objectid.(tape.list)) == sort(objectid.(collect(values(tape.dict)))) + println("gcnode_index has $(length(gcnode_index)) objectid(node) keys:") + println(collect(keys(gcnode_index))) + println("tape.list has $(length(tape.list)) nodes:") + println(objectid.(tape.list)) + println("tape.dict has $(length(tape.dict)) Tracked-Node pairs:") + println(objectid.(collect(values(tape.dict)))) +end - +function gcnode_debug_parent(n, tape, i) + parent = n.parents[i] + println("WARNING: Cannot find parent $i of $(objectid(n)) with id $(objectid(parent)) in gcnode_index") + ni = findfirst(isequal(parent), tape.list) + if ni === nothing + println("The parent does not appear on tape.list") + else + println("The parent appears on index $ni on tape.list") + end + nj = findfirst(isequal(parent), tape.dict) + if nj === nothing + println("The parent does not appear on tape.dict") + else + println("The parent appears on tape.dict") + end + @show sort(collect(keys(gcnode_index))) == sort(objectid.(tape.list)) + @show sort(collect(keys(gcnode_index))) == sort(objectid.(collect(values(tape.dict)))) + @show sort(objectid.(tape.list)) == sort(objectid.(collect(values(tape.dict)))) + println("gcnode_index has $(length(gcnode_index)) objectid(node) keys:") + println(collect(keys(gcnode_index))) + println("tape.list has $(length(tape.list)) nodes:") + println(objectid.(tape.list)) + println("tape.dict has $(length(tape.dict)) Tracked-Node pairs:") + println(objectid.(collect(values(tape.dict)))) +end From ed1a081262f5c6a25c9ae003b4dd214d58d8b2bc Mon Sep 17 00:00:00 2001 From: Deniz Yuret Date: Sat, 5 Dec 2020 18:43:41 +0300 Subject: [PATCH 2/3] check objectid(tape) in gcnode. --- src/autograd_gpu/gcnode.jl | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/autograd_gpu/gcnode.jl b/src/autograd_gpu/gcnode.jl index e3b2acd02..13beea14d 100644 --- a/src/autograd_gpu/gcnode.jl +++ b/src/autograd_gpu/gcnode.jl @@ -32,6 +32,7 @@ const gcnode_index = Dict{UInt,Int}() # There is no explicit call to gcnode_init by the user. gcnode just resets everything if the # input Tape is different from gcnode_tape: const gcnode_tape = WeakRef(nothing) +const gcnode_tape_id = Ref(UInt(0)) # During the backward step parents of a node (who have lower indices) may have their # outgrads modified, thus new references to CuArrays that we have already indexed may @@ -46,10 +47,10 @@ end function gcnode_init(tape::Tape) gcnode_tape.value = tape + gcnode_tape_id[] = objectid(tape) empty!(gcnode_index) empty!(gcnode_queue) empty!(gcnode_dict) - tape isa Tape || return @inbounds for (i,n) in enumerate(tape.list) gcnode_index[objectid(n)] = i if n.Value isa Result @@ -66,8 +67,9 @@ function gcnode(n::Node, tape::Tape) cuallocator[] || return knetgcnode(n,tape) if tape !== gcnode_tape.value gcnode_init(tape) + elseif objectid(tape) !== gcnode_tape_id[] + println("Tape inconsistency: objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") end - tape isa Tape || return @inbounds for i in 1:length(n.parents); isassigned(n.parents, i) || continue parent = n.parents[i] @@ -106,6 +108,7 @@ const gcnode_null = Result{Nothing}(nothing,nothing,nothing,nothing) function gcnode_debug_node(n, tape) println("WARNING: Cannot find node $(objectid(n)) in gcnode_index") + println("objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") ni = findfirst(isequal(n), tape.list) if ni === nothing println("The node does not appear on tape.list") @@ -132,6 +135,7 @@ end function gcnode_debug_parent(n, tape, i) parent = n.parents[i] println("WARNING: Cannot find parent $i of $(objectid(n)) with id $(objectid(parent)) in gcnode_index") + println("objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") ni = findfirst(isequal(parent), tape.list) if ni === nothing println("The parent does not appear on tape.list") From 8a944bf853ea7555bfa33909a2d2be4d0ddf0473 Mon Sep 17 00:00:00 2001 From: Deniz Yuret Date: Mon, 7 Dec 2020 03:37:26 +0300 Subject: [PATCH 3/3] Fix #624: gcnode "key not found" --- src/autograd_gpu/gcnode.jl | 93 ++++++-------------------------------- 1 file changed, 15 insertions(+), 78 deletions(-) diff --git a/src/autograd_gpu/gcnode.jl b/src/autograd_gpu/gcnode.jl index 13beea14d..89588d17b 100644 --- a/src/autograd_gpu/gcnode.jl +++ b/src/autograd_gpu/gcnode.jl @@ -1,5 +1,5 @@ using CUDA: CuArray, unsafe_free! -using Knet.CuArrays: cuarrays # Profiling shows most of the time is spent in cuarrays +using Knet.CuArrays: cuarrays using Knet.KnetArrays: cuallocator using AutoGrad: Result, Node, Tape @@ -29,10 +29,6 @@ const gcnode_dict = Dict{UInt,WeakRef}() # ObjectId(::Node) for keys to allow gc after gcnode is done. const gcnode_index = Dict{UInt,Int}() -# There is no explicit call to gcnode_init by the user. gcnode just resets everything if the -# input Tape is different from gcnode_tape: -const gcnode_tape = WeakRef(nothing) -const gcnode_tape_id = Ref(UInt(0)) # During the backward step parents of a node (who have lower indices) may have their # outgrads modified, thus new references to CuArrays that we have already indexed may @@ -46,8 +42,6 @@ function gcnode_setindex!(c::CuArray,v::Int) end function gcnode_init(tape::Tape) - gcnode_tape.value = tape - gcnode_tape_id[] = objectid(tape) empty!(gcnode_index) empty!(gcnode_queue) empty!(gcnode_dict) @@ -61,41 +55,35 @@ function gcnode_init(tape::Tape) for c in cuarrays(n.outgrad); gcnode_setindex!(c,0); end end end + # Mark this tape so we know when gcnode is called with a new tape and gcnode_init is needed + tape.dict[gcnode_null] = gcnode_null_node end + function gcnode(n::Node, tape::Tape) cuallocator[] || return knetgcnode(n,tape) - if tape !== gcnode_tape.value + if !haskey(tape.dict, gcnode_null) gcnode_init(tape) - elseif objectid(tape) !== gcnode_tape_id[] - println("Tape inconsistency: objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") + end + ni = gcnode_index[objectid(n)] + if n.Value isa Result + for c in cuarrays(n.outgrad); gcnode_setindex!(c, ni); end end @inbounds for i in 1:length(n.parents); isassigned(n.parents, i) || continue parent = n.parents[i] if parent.Value isa Result - pi = get(gcnode_index, objectid(parent), 0) - if pi > 0 - for c in cuarrays(parent.outgrad); gcnode_setindex!(c, pi); end - else - gcnode_debug_parent(n, tape, i) - end + pi = gcnode_index[objectid(parent)] + for c in cuarrays(parent.outgrad); gcnode_setindex!(c, pi); end else for c in cuarrays(parent.outgrad); gcnode_setindex!(c,0); end # protect Params end end - ni = get(gcnode_index, objectid(n), 0) - if ni == 0 - gcnode_debug_node(n, tape) - return - end - if n.Value isa Result - for c in cuarrays(n.outgrad); gcnode_setindex!(c, ni); end - end while !isempty(gcnode_queue) && peek(gcnode_queue)[2] >= ni ## 5.62μs (cid,v) = dequeue_pair!(gcnode_queue) + @assert v == ni c = gcnode_dict[cid].value - if v == ni && c isa CuArray ## c turns into nothing if gc'ed + if c isa CuArray ## c turns into nothing if gc'ed unsafe_free!(c) end end @@ -104,57 +92,6 @@ function gcnode(n::Node, tape::Tape) end end -const gcnode_null = Result{Nothing}(nothing,nothing,nothing,nothing) -function gcnode_debug_node(n, tape) - println("WARNING: Cannot find node $(objectid(n)) in gcnode_index") - println("objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") - ni = findfirst(isequal(n), tape.list) - if ni === nothing - println("The node does not appear on tape.list") - else - println("The node appears on index $ni on tape.list") - end - nj = findfirst(isequal(n), tape.dict) - if nj === nothing - println("The node does not appear on tape.dict") - else - println("The node appears on tape.dict") - end - @show sort(collect(keys(gcnode_index))) == sort(objectid.(tape.list)) - @show sort(collect(keys(gcnode_index))) == sort(objectid.(collect(values(tape.dict)))) - @show sort(objectid.(tape.list)) == sort(objectid.(collect(values(tape.dict)))) - println("gcnode_index has $(length(gcnode_index)) objectid(node) keys:") - println(collect(keys(gcnode_index))) - println("tape.list has $(length(tape.list)) nodes:") - println(objectid.(tape.list)) - println("tape.dict has $(length(tape.dict)) Tracked-Node pairs:") - println(objectid.(collect(values(tape.dict)))) -end - -function gcnode_debug_parent(n, tape, i) - parent = n.parents[i] - println("WARNING: Cannot find parent $i of $(objectid(n)) with id $(objectid(parent)) in gcnode_index") - println("objectid(tape)=$(objectid(tape)) gcnode_tape_id[]=$(gcnode_tape_id[]) objectid(gcnode_tape.value) = $(objectid(gcnode_tape.value))") - ni = findfirst(isequal(parent), tape.list) - if ni === nothing - println("The parent does not appear on tape.list") - else - println("The parent appears on index $ni on tape.list") - end - nj = findfirst(isequal(parent), tape.dict) - if nj === nothing - println("The parent does not appear on tape.dict") - else - println("The parent appears on tape.dict") - end - @show sort(collect(keys(gcnode_index))) == sort(objectid.(tape.list)) - @show sort(collect(keys(gcnode_index))) == sort(objectid.(collect(values(tape.dict)))) - @show sort(objectid.(tape.list)) == sort(objectid.(collect(values(tape.dict)))) - println("gcnode_index has $(length(gcnode_index)) objectid(node) keys:") - println(collect(keys(gcnode_index))) - println("tape.list has $(length(tape.list)) nodes:") - println(objectid.(tape.list)) - println("tape.dict has $(length(tape.dict)) Tracked-Node pairs:") - println(objectid.(collect(values(tape.dict)))) -end +const gcnode_null = Result{Nothing}(nothing,nothing,nothing,nothing) +const gcnode_null_node = Node(gcnode_null)