tutorial updates

denizyuret · Sep 12, 2018 · 49ab0c7 · 49ab0c7
1 parent fa197bc
commit 49ab0c7
Show file tree

Hide file tree

Showing 9 changed files with 1,226 additions and 566 deletions.
diff --git a/Project.toml b/Project.toml
@@ -9,7 +9,6 @@ FileIO = "5789e2e9-d7fb-5bc7-8068-2c6fae9b9549"
 JLD2 = "033835bb-8acc-5ee8-8aae-3f567f8a3819"
 Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
-ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SpecialFunctions = "276daf66-3868-5448-9aa4-cd146d93841b"
 Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
@@ -19,7 +18,6 @@ AutoGrad = "1.0.1"
 CUDAapi = "0.5.0"
 FileIO = "1.0.1"
 JLD2 = "0.1.1"
-ProgressMeter = "0.6.0"
 SpecialFunctions = "0.7.0"
 
 [extras]

diff --git a/REQUIRE b/REQUIRE
@@ -3,11 +3,14 @@ AutoGrad 1.0.1
 CUDAapi 0.5.0
 FileIO 1.0.1
 JLD2 0.1.1
-ProgressMeter 0.6.0
 SpecialFunctions 0.7.0
 
 # The following are used by the build script if installed.
 # CUDAdrv, Documenter
 
 # We need the following for some examples.  They get automatically installed when needed.
 # ArgParse, JLD, JLD2, Images, CodecZlib, MAT, PyCall, JSON, IJulia, BenchmarkTools, Distributions
+
+# ProgressMeter 0.6.0
+# ProgressMeter = "92933f4c-e287-5a05-a399-4b506db050ca"
+# ProgressMeter = "0.6.0"
diff --git a/src/model.jl b/src/model.jl
@@ -27,6 +27,13 @@ atype()=(gpu() >= 0 ? KnetArray{Float32} : Array{Float32})
 # We don't call model directly, only through loss (because it may need model params for regularization).
 # So we pass all unrecognized kwargs to loss and let it sort out.
 
+# What to pass to the callback:
+# model, data, loss, optimizer and (o...) are all available to the caller. No need to pass to callback.
+# The only things that are not available are J,x,y. I can't think of a use for x,y.
+# That leaves J. I considered passing value(J), however that prevents the callback from looking at gradients.
+# (e.g. for reporting the gradient norms), so I decided to pass back J as is.
+
+
 """
     train!(model, data; loss, optimizer, callback, o...)
 
@@ -35,70 +42,61 @@ Train a model with given data.
 * `model`: A callable object. `model(x; o...)` should return a prediction. `params(model)`
    will automatically iterate over model parameters.
 * `data`: An iterator. `for (x,y) in data` should iterate over input-output pairs.
-* `loss=nll`: A loss function, called with `loss(model,x,y; o...)`.
-* `optimizer=SGD()`: An optimizer object that will be copied for each parameter and used by
+* `loss=nll`: A loss function, called with `J = @diff loss(model,x,y; o...)`.
+* `optimizer=Adam()`: An optimizer object that will be copied for each parameter and used by
   `[update!]`(@ref).
-* `callback`: To facilitate reporting and termination, a callback function is called
-   before every update with `callback(model,x,y,loss)`. Training continues if the return value
-   is true, terminates if it is false.  See the [`Train`](@ref) object as an example
-   callback. The default callback quits after one epoch.
-* Other keyword arguments will be passed to `loss` and possibly by `loss` to `model`.
+* `callback`: To facilitate reporting and termination, a callback function is called before
+   every update with `callback(J)`. Training continues if the return value is true,
+   terminates if it is false.  The default callback runs until training loss convergence.
+* Other keyword arguments `(o...)` will be passed to `loss` and possibly by `loss` to `model`.
 """
-function train!(model, data; loss=nll, optimizer=SGD(), callback=ncount(length(data)), o...)
-    for param in params(model)
+function train!(model, data; loss=nll, optimizer=Adam(), callback=converge(), o...)
+    ps = params(model)
+    for param in ps
         param.opt = deepcopy(optimizer)
     end
     while true
         for (x,y) in data
             J = @diff loss(model,x,y; o...)
-            if !callback(model,x,y,value(J)); return; end
-            update!(model, J)
+            if !callback(J)
+                return
+            end
+            for param in ps
+                g = grad(J,param)
+                update!(value(param),g,param.opt)
+            end
         end
     end
 end
 
-function update!(model,J::Tape)
-    for w in params(model)
-        g = grad(J,w)
-        update!(value(w),g,w.opt)
+# import ProgressMeter            # don't want to import update!
+
+function converge(alpha = 0.001)
+    avgx = Inf
+    avgp = 0.0
+    # prog = ProgressMeter.ProgressThresh(0.0, "Training loss: ")
+    function callback(x)
+        x = value(x)
+        if avgx == Inf; avgx = x; end
+        p = x - avgx
+        avgx = alpha * x + (1-alpha) * avgx
+        avgp = alpha * p + (1-alpha) * avgp
+        # ProgressMeter.update!(prog,avgx)
+        return avgp <= 0.0
     end
+    return callback
 end
 
 
-ncount(n)=((x...)->(n > 0 && (n -= 1; true)))
-
-import ProgressMeter            # don't want to import update!
-
-"""
-    Train(howlong, datasets...)
-
-Create a callback function that can be used with [`train!`](@ref).
-
-`howlong` can be an integer, an array of integers, or a `StepRange` such as 0:100:1000
-representing the number of updates for reporting, testing and termination. The training will
-terminate when the number of updates reach howlong[end]. So the simplest use would be
-`Train(n::Int)` which will cause training to terminate after `n` updates.  If the update
-count ∈ howlong, a progress bar will be updated and the model will be tested on the datasets
-if any are provided. For example `Train(0:100:1000,dtst)` will update the progress bar and
-calculate loss and error on dtst every 100 updates and terminate at 1000 updates.  The
-`losses` and `errors` fields of the `Train` object will contain the results of these tests.
-
-"""
-mutable struct Train
-    whentorecord; datasets; losses; errors; updatecount; progress
-    Train(w,ds...)=new(w, ds, [Float64[] for d in ds], [Float64[] for d in ds], 0, ProgressMeter.Progress(w[end],1))
-end
-
-function (t::Train)(model,x,y,loss)
-    if t.updatecount ∈ t.whentorecord
-        ProgressMeter.update!(t.progress, t.updatecount)
-        for (data,loss,err) in zip(t.datasets, t.losses, t.errors)
-            push!(loss, nll(model,data))
-            push!(err, zeroone(model,data))
-        end
-    end
-    t.updatecount += 1
-    return t.updatecount <= t.whentorecord[end]
-end
-
+# Issues:
+# What if we call train multiple times, and don't want to use the optimizers?
+# Do we want parameter initialization as well? init and opt init should happen once.
+# Recording losses with different loss functions.
+# What info does the callback need?
+# Are we doing anything other than pushing kwargs from train to Train?
+# What if we want convergence in trnloss or convergence in devloss? Return earlier (best) model?
+# How do we easily measure epochs?
+# ProgressMeter both in time mode and converge mode.
+# Printing loss with ProgressMeter seems difficult.
+# Frequency of progress updates and loss calculations?
 
diff --git a/tutorial/03.lin.ipynb b/tutorial/03.lin.ipynb