Merge pull request #50 from bankofcanada/sts_lm

Levenberg–Marquardt for StackedTimeSolver
bankofcanada · Nov 17, 2023 · 71b2476 · 71b2476
2 parents 8aed99b + 4aeabc7
commit 71b2476
Show file tree

Hide file tree

Showing 9 changed files with 391 additions and 86 deletions.
diff --git a/src/StackedTimeSolver.jl b/src/StackedTimeSolver.jl
@@ -54,6 +54,9 @@ include("stackedtime/sparse.jl")
 include("stackedtime/fctypes.jl")
 include("stackedtime/misc.jl")
 include("stackedtime/solverdata.jl")
+include("stackedtime/sim_lm.jl")
+include("stackedtime/sim_nr.jl")
+include("stackedtime/sim_gn.jl")
 include("stackedtime/simulate.jl")
 include("stackedtime/shockdecomp.jl")
 include("stackedtime/stoch_simulate.jl")

diff --git a/src/stackedtime/sim_gn.jl b/src/stackedtime/sim_gn.jl
@@ -0,0 +1,60 @@
+##################################################################################
+# This file is part of StateSpaceEcon.jl
+# BSD 3-Clause License
+# Copyright (c) 2020-2023, Bank of Canada
+# All rights reserved.
+##################################################################################
+
+"""
+    sim_gn!(x, sd, maxiter, tol, verbose [, linesearch])
+
+Solve the simulation problem using the Gauss-Newton method.
+  * `x` - the array of data for the simulation. All initial, final and exogenous
+    conditions are already in place.
+  * `sd::AbstractSolverData` - the solver data constructed for the simulation
+    problem.
+  * `maxiter` - maximum number of iterations.
+  * `tol` - desired accuracy.
+  * `verbose` - whether or not to print progress information.
+"""
+function sim_gn!(x::AbstractArray{Float64}, sd::StackedTimeSolverData,
+    maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false
+)
+
+    @warn "Gauss-Newton method is experimental."
+    if verbose
+        @info "Simulation using Gauss-Newton method."
+    end
+
+    Δx = Vector{Float64}(undef, size(sd.J, 1))
+    R = Vector{Float64}(undef, size(sd.J, 1))
+    stackedtime_R!(R, x, x, sd)
+    nR0 = 1.0
+    nR0 = norm(R, Inf)
+    if verbose
+        @info "0, || Fx || = $(nR0)"
+    end
+    nR = nR0
+    Δx = similar(R)
+    for it = 1:maxiter
+        if nR < tol
+            return true
+        end
+        R, FJ = stackedtime_RJ(x, x, sd; factorization=:none)
+        nR = norm(R, Inf)
+        J = FJ.A
+        JtR = J'R
+        JtJ = J'J
+        Δx .= JtJ \ JtR
+        nΔx = norm(Δx, Inf)
+        assign_update_step!(x, -1.0, Δx, sd)
+        if verbose
+            @info "$it, || Fx || = $(nR), || Δx || = $(nΔx)"
+        end
+        if nΔx < tol
+            return true
+        end
+    end
+    return false
+end
+
diff --git a/src/stackedtime/sim_lm.jl b/src/stackedtime/sim_lm.jl
@@ -0,0 +1,173 @@
+##################################################################################
+# This file is part of StateSpaceEcon.jl
+# BSD 3-Clause License
+# Copyright (c) 2020-2023, Bank of Canada
+# All rights reserved.
+##################################################################################
+
+function _sim_lm_step(
+    x::AbstractArray{Float64},
+    Δx::AbstractArray{Float64},
+    sd::StackedTimeSolverData;
+    lm_params=[0.1, 8.0, 0.0]
+)
+
+    lambda, nu, qual = lm_params
+
+    R, FJ = stackedtime_RJ(x, x, sd; factorization=:none)
+    J = FJ.A
+
+    nx = length(R)
+    JtR = J'R
+    M = J'J
+    # max is to make sure no zeros in Jd
+    Jd = max.(1e-10, vec(sum(abs2, J; dims=1)))
+    for i = 1:nx
+        @inbounds M[i, i] += lambda * Jd[i]
+    end
+    n2R = sum(abs2, R)
+    Δx .= M \ JtR
+    # predicted residual squared norm
+    n2PR = sum(abs2, R - J * Δx)
+    # actual residual and its squared norm
+    x_buf = copy(x)
+    assign_update_step!(x_buf, -1.0, Δx, sd)
+    stackedtime_R!(R, x_buf, x_buf, sd)
+    n2AR = sum(abs2, R)
+    # quality of step
+    qual = (n2AR - n2R) / (n2PR - n2R)
+    # adjust lambda
+    if qual > 0.75
+        # good quality, extend trust region
+        lambda = max(1e-16, lambda / nu)
+    elseif qual < 1e-3
+        # bad quality, shrink trust region
+        lambda = min(1e16, lambda * nu)
+    end
+    lm_params[:] = [lambda, nu, qual]
+    return nothing
+end
+
+function _sim_lm_first_step(
+    x::AbstractArray{Float64},
+    Δx::AbstractArray{Float64},
+    sd::StackedTimeSolverData;
+    lm_params=[0.1, 8.0, 0.0]
+)
+
+    lambda, nu, qual = lm_params
+
+    R, FJ = stackedtime_RJ(x, x, sd; factorization=:none)
+    J = FJ.A
+
+    nx = length(R)
+    JtR = J'R
+    M = J'J
+    # max is to make sure no zeros in Jd
+    Jd = max.(1e-10, vec(sum(abs2, J; dims=1)))
+    n2R = sum(abs2, R)
+
+    qual = 0.0
+    coef = 1.0
+    n2AR = -1.0
+    while qual < 1e-3
+        for i = 1:nx
+            @inbounds M[i, i] += lambda * Jd[i]
+            # the above should be diag(M) += lambda * Jd
+            # we have coef = (1-1/nu) for the iterations after the first one
+            # in order to subtract the previous lambda from diag(M)
+            # each iteration we shrink the trust region by setting lambda *= nu
+        end
+        Δx .= M \ JtR
+        # predicted residual squared norm
+        n2PR = sum(abs2, R - J * Δx)
+        # actual residual and its squared norm
+        x_buf = copy(x)
+        assign_update_step!(x_buf, -1.0, Δx, sd)
+        qual = 0.0
+        try
+            stackedtime_R!(R, x_buf, x_buf, sd)
+            n2AR = sum(abs2, R)
+            # quality of step
+            qual = (n2AR - n2R) / (n2PR - n2R)
+        catch
+        end
+        if qual > 0.75 # very good quality, extend the trust region
+            lambda = lambda / nu
+            break
+        end
+        if lambda >= 1e16 # lambda is getting too large
+            lambda = 1e16
+            break
+        end
+        # shrink the trust regions
+        lambda = lambda * nu
+        coef = 1.0 - 1.0 / nu
+    end
+    lm_params[:] = [lambda, nu, qual]
+    return nothing
+end
+
+"""
+    sim_lm!(x, sd, maxiter, tol, verbose [, linesearch])
+
+Solve the simulation problem using the Levenberg–Marquardt method.
+  * `x` - the array of data for the simulation. All initial, final and exogenous
+    conditions are already in place.
+  * `sd::AbstractSolverData` - the solver data constructed for the simulation
+    problem.
+  * `maxiter` - maximum number of iterations.
+  * `tol` - desired accuracy.
+  * `verbose` - whether or not to print progress information.
+"""
+function sim_lm!(x::AbstractArray{Float64}, sd::StackedTimeSolverData,
+    maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false
+)
+
+    @warn "Levenberg–Marquardt method is experimental."
+    if verbose
+        @info "Simulation using Levenberg–Marquardt method."
+    end
+
+    lm_params = [0.1, 8.0, 0.0]
+    Δx = Vector{Float64}(undef, size(sd.J, 1))
+    R = Vector{Float64}(undef, size(sd.J, 1))
+    stackedtime_R!(R, x, x, sd)
+    nR0 = 1.0
+    nR0 = norm(R, Inf)
+    if verbose
+        @info "0, || Fx || = $(nR0)"
+    end
+    # step 1
+    _sim_lm_first_step(x, Δx, sd; lm_params)
+    assign_update_step!(x, -1.0, Δx, sd)
+    stackedtime_R!(R, x, x, sd)
+    nR = norm(R, Inf)
+    nΔx = norm(Δx, Inf)
+    if verbose
+        @info "1, || Fx || = $(nR), || Δx || = $(nΔx), lambda = $(lm_params[1]), qual = $(lm_params[3])"
+    end
+    for it = 2:maxiter
+        if nR < tol
+            return true
+        end
+        _sim_lm_step(x, Δx, sd; lm_params)
+        assign_update_step!(x, -1.0, Δx, sd)
+        stackedtime_R!(R, x, x, sd)
+        nR = norm(R, Inf)
+        nΔx = norm(Δx, Inf)
+        if verbose
+            @info "$it, || Fx || = $(nR), || Δx || = $(nΔx), lambda = $(lm_params[1]), qual = $(lm_params[3])"
+        end
+        if  (nΔx < 1e-5 || true ) &&  ((4nR < nR0 && nR < 1e-2) || (nR / nR0 > 0.8)) && (lm_params[1] <= 1e-4)
+            if verbose
+                @info "    --- switching to Newton-Raphson ---   "
+            end
+            sd.J_factorized[] = nothing
+            return sim_nr!(x, sd, maxiter, tol, verbose, linesearch)
+        end
+        # nR0 = nR
+    end
+    return false
+end
+
diff --git a/src/stackedtime/sim_nr.jl b/src/stackedtime/sim_nr.jl
@@ -0,0 +1,80 @@
+##################################################################################
+# This file is part of StateSpaceEcon.jl
+# BSD 3-Clause License
+# Copyright (c) 2020-2023, Bank of Canada
+# All rights reserved.
+##################################################################################
+
+"""
+    sim_nr!(x, sd, maxiter, tol, verbose [, linesearch])
+
+Solve the simulation problem.
+  * `x` - the array of data for the simulation. All initial, final and exogenous
+    conditions are already in place.
+  * `sd::AbstractSolverData` - the solver data constructed for the simulation
+    problem.
+  * `maxiter` - maximum number of iterations.
+  * `tol` - desired accuracy.
+  * `verbose` - whether or not to print progress information.
+  * `linesearch::Bool` - When `true` the Newton-Raphson is modified to include a 
+    search along the descent direction for a sufficient decrease in f. It will 
+    do this at each iteration. Default is `false`.
+
+"""
+@timeit_debug timer function sim_nr!(x::AbstractArray{Float64}, sd::StackedTimeSolverData,
+    maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false)
+    for it = 1:maxiter
+        Fx = stackedtime_R!(Vector{Float64}(undef, size(sd.J, 1)), x, x, sd)
+        nFx = norm(Fx, Inf)
+        if nFx < tol
+            if verbose
+                @info "$it, || Fx || = $(nFx)"
+            end
+            return true
+        end
+        Δx, Jx = stackedtime_RJ(x, x, sd)
+        Δx = sf_solve!(Jx, Δx)
+
+        λ = 1.0
+        if linesearch
+            nf = norm(Fx)
+            # the Armijo rule: C.T.Kelly, Iterative Methods for Linear and Nonlinear Equations, ch.8.1, p.137
+            α = 1e-4
+            σ = 0.5
+            while λ > 0.00001
+                x_buf = copy(x)
+                assign_update_step!(x_buf, -λ, Δx, sd)
+                nrb2 = try
+                    stackedtime_R!(Fx, x_buf, x_buf, sd)
+                    norm(Fx)
+                catch e
+                    Inf
+                end
+                if nrb2 < (1.0 - α * λ) * nf
+                    # if verbose && λ < 1.0
+                    #     @info "Linesearch success with λ = $λ."
+                    # end
+                    break
+                end
+                λ = σ * λ
+            end
+            if verbose
+                if λ <= 0.00001
+                    @warn "Linesearch failed."
+                elseif λ < 1.0
+                    @info "Linesearch success with λ=$λ"
+                end
+            end
+        end
+        nΔx = λ * norm(vec(Δx), Inf)
+        assign_update_step!(x, -λ, Δx, sd)
+        if verbose
+            @info "$it, || Fx || = $(nFx), || Δx || = $(nΔx)"
+        end
+        if nΔx < tol
+            return true
+        end
+    end
+    return false
+end
+