Skip to content

Commit

Permalink
tutorial updates
Browse files Browse the repository at this point in the history
  • Loading branch information
denizyuret committed Sep 12, 2018
1 parent fa197bc commit 49ab0c7
Show file tree
Hide file tree
Showing 9 changed files with 1,226 additions and 566 deletions.
2 changes: 0 additions & 2 deletions Project.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
Expand All @@ -19,7 +18,6 @@ AutoGrad = "1.0.1"
CUDAapi = "0.5.0"
FileIO = "1.0.1"
JLD2 = "0.1.1"
ProgressMeter = "0.6.0"
SpecialFunctions = "0.7.0"

[extras]
Expand Down
5 changes: 4 additions & 1 deletion REQUIRE
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,14 @@ AutoGrad 1.0.1
CUDAapi 0.5.0
FileIO 1.0.1
JLD2 0.1.1
ProgressMeter 0.6.0
SpecialFunctions 0.7.0

# The following are used by the build script if installed.
# CUDAdrv, Documenter

# We need the following for some examples. They get automatically installed when needed.
# ArgParse, JLD, JLD2, Images, CodecZlib, MAT, PyCall, JSON, IJulia, BenchmarkTools, Distributions

# ProgressMeter 0.6.0
# ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
# ProgressMeter = "0.6.0"
100 changes: 49 additions & 51 deletions src/model.jl
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,13 @@ atype()=(gpu() >= 0 ? KnetArray{Float32} : Array{Float32})
# We don't call model directly, only through loss (because it may need model params for regularization).
# So we pass all unrecognized kwargs to loss and let it sort out.

# What to pass to the callback:
# model, data, loss, optimizer and (o...) are all available to the caller. No need to pass to callback.
# The only things that are not available are J,x,y. I can't think of a use for x,y.
# That leaves J. I considered passing value(J), however that prevents the callback from looking at gradients.
# (e.g. for reporting the gradient norms), so I decided to pass back J as is.


"""
train!(model, data; loss, optimizer, callback, o...)
Expand All @@ -35,70 +42,61 @@ Train a model with given data.
* `model`: A callable object. `model(x; o...)` should return a prediction. `params(model)`
will automatically iterate over model parameters.
* `data`: An iterator. `for (x,y) in data` should iterate over input-output pairs.
* `loss=nll`: A loss function, called with `loss(model,x,y; o...)`.
* `optimizer=SGD()`: An optimizer object that will be copied for each parameter and used by
* `loss=nll`: A loss function, called with `J = @diff loss(model,x,y; o...)`.
* `optimizer=Adam()`: An optimizer object that will be copied for each parameter and used by
`[update!]`(@ref).
* `callback`: To facilitate reporting and termination, a callback function is called
before every update with `callback(model,x,y,loss)`. Training continues if the return value
is true, terminates if it is false. See the [`Train`](@ref) object as an example
callback. The default callback quits after one epoch.
* Other keyword arguments will be passed to `loss` and possibly by `loss` to `model`.
* `callback`: To facilitate reporting and termination, a callback function is called before
every update with `callback(J)`. Training continues if the return value is true,
terminates if it is false. The default callback runs until training loss convergence.
* Other keyword arguments `(o...)` will be passed to `loss` and possibly by `loss` to `model`.
"""
function train!(model, data; loss=nll, optimizer=SGD(), callback=ncount(length(data)), o...)
for param in params(model)
function train!(model, data; loss=nll, optimizer=Adam(), callback=converge(), o...)
ps = params(model)
for param in ps
param.opt = deepcopy(optimizer)
end
while true
for (x,y) in data
J = @diff loss(model,x,y; o...)
if !callback(model,x,y,value(J)); return; end
update!(model, J)
if !callback(J)
return
end
for param in ps
g = grad(J,param)
update!(value(param),g,param.opt)
end
end
end
end

function update!(model,J::Tape)
for w in params(model)
g = grad(J,w)
update!(value(w),g,w.opt)
# import ProgressMeter # don't want to import update!

function converge(alpha = 0.001)
avgx = Inf
avgp = 0.0
# prog = ProgressMeter.ProgressThresh(0.0, "Training loss: ")
function callback(x)
x = value(x)
if avgx == Inf; avgx = x; end
p = x - avgx
avgx = alpha * x + (1-alpha) * avgx
avgp = alpha * p + (1-alpha) * avgp
# ProgressMeter.update!(prog,avgx)
return avgp <= 0.0
end
return callback
end


ncount(n)=((x...)->(n > 0 && (n -= 1; true)))

import ProgressMeter # don't want to import update!

"""
Train(howlong, datasets...)
Create a callback function that can be used with [`train!`](@ref).
`howlong` can be an integer, an array of integers, or a `StepRange` such as 0:100:1000
representing the number of updates for reporting, testing and termination. The training will
terminate when the number of updates reach howlong[end]. So the simplest use would be
`Train(n::Int)` which will cause training to terminate after `n` updates. If the update
count ∈ howlong, a progress bar will be updated and the model will be tested on the datasets
if any are provided. For example `Train(0:100:1000,dtst)` will update the progress bar and
calculate loss and error on dtst every 100 updates and terminate at 1000 updates. The
`losses` and `errors` fields of the `Train` object will contain the results of these tests.
"""
mutable struct Train
whentorecord; datasets; losses; errors; updatecount; progress
Train(w,ds...)=new(w, ds, [Float64[] for d in ds], [Float64[] for d in ds], 0, ProgressMeter.Progress(w[end],1))
end

function (t::Train)(model,x,y,loss)
if t.updatecount t.whentorecord
ProgressMeter.update!(t.progress, t.updatecount)
for (data,loss,err) in zip(t.datasets, t.losses, t.errors)
push!(loss, nll(model,data))
push!(err, zeroone(model,data))
end
end
t.updatecount += 1
return t.updatecount <= t.whentorecord[end]
end

# Issues:
# What if we call train multiple times, and don't want to use the optimizers?
# Do we want parameter initialization as well? init and opt init should happen once.
# Recording losses with different loss functions.
# What info does the callback need?
# Are we doing anything other than pushing kwargs from train to Train?
# What if we want convergence in trnloss or convergence in devloss? Return earlier (best) model?
# How do we easily measure epochs?
# ProgressMeter both in time mode and converge mode.
# Printing loss with ProgressMeter seems difficult.
# Frequency of progress updates and loss calculations?

152 changes: 76 additions & 76 deletions tutorial/03.lin.ipynb

Large diffs are not rendered by default.

Loading

0 comments on commit 49ab0c7

Please sign in to comment.