-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #50 from bankofcanada/sts_lm
Levenberg–Marquardt for StackedTimeSolver
- Loading branch information
Showing
9 changed files
with
391 additions
and
86 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
################################################################################## | ||
# This file is part of StateSpaceEcon.jl | ||
# BSD 3-Clause License | ||
# Copyright (c) 2020-2023, Bank of Canada | ||
# All rights reserved. | ||
################################################################################## | ||
|
||
""" | ||
sim_gn!(x, sd, maxiter, tol, verbose [, linesearch]) | ||
Solve the simulation problem using the Gauss-Newton method. | ||
* `x` - the array of data for the simulation. All initial, final and exogenous | ||
conditions are already in place. | ||
* `sd::AbstractSolverData` - the solver data constructed for the simulation | ||
problem. | ||
* `maxiter` - maximum number of iterations. | ||
* `tol` - desired accuracy. | ||
* `verbose` - whether or not to print progress information. | ||
""" | ||
function sim_gn!(x::AbstractArray{Float64}, sd::StackedTimeSolverData, | ||
maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false | ||
) | ||
|
||
@warn "Gauss-Newton method is experimental." | ||
if verbose | ||
@info "Simulation using Gauss-Newton method." | ||
end | ||
|
||
Δx = Vector{Float64}(undef, size(sd.J, 1)) | ||
R = Vector{Float64}(undef, size(sd.J, 1)) | ||
stackedtime_R!(R, x, x, sd) | ||
nR0 = 1.0 | ||
nR0 = norm(R, Inf) | ||
if verbose | ||
@info "0, || Fx || = $(nR0)" | ||
end | ||
nR = nR0 | ||
Δx = similar(R) | ||
for it = 1:maxiter | ||
if nR < tol | ||
return true | ||
end | ||
R, FJ = stackedtime_RJ(x, x, sd; factorization=:none) | ||
nR = norm(R, Inf) | ||
J = FJ.A | ||
JtR = J'R | ||
JtJ = J'J | ||
Δx .= JtJ \ JtR | ||
nΔx = norm(Δx, Inf) | ||
assign_update_step!(x, -1.0, Δx, sd) | ||
if verbose | ||
@info "$it, || Fx || = $(nR), || Δx || = $(nΔx)" | ||
end | ||
if nΔx < tol | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
################################################################################## | ||
# This file is part of StateSpaceEcon.jl | ||
# BSD 3-Clause License | ||
# Copyright (c) 2020-2023, Bank of Canada | ||
# All rights reserved. | ||
################################################################################## | ||
|
||
function _sim_lm_step( | ||
x::AbstractArray{Float64}, | ||
Δx::AbstractArray{Float64}, | ||
sd::StackedTimeSolverData; | ||
lm_params=[0.1, 8.0, 0.0] | ||
) | ||
|
||
lambda, nu, qual = lm_params | ||
|
||
R, FJ = stackedtime_RJ(x, x, sd; factorization=:none) | ||
J = FJ.A | ||
|
||
nx = length(R) | ||
JtR = J'R | ||
M = J'J | ||
# max is to make sure no zeros in Jd | ||
Jd = max.(1e-10, vec(sum(abs2, J; dims=1))) | ||
for i = 1:nx | ||
@inbounds M[i, i] += lambda * Jd[i] | ||
end | ||
n2R = sum(abs2, R) | ||
Δx .= M \ JtR | ||
# predicted residual squared norm | ||
n2PR = sum(abs2, R - J * Δx) | ||
# actual residual and its squared norm | ||
x_buf = copy(x) | ||
assign_update_step!(x_buf, -1.0, Δx, sd) | ||
stackedtime_R!(R, x_buf, x_buf, sd) | ||
n2AR = sum(abs2, R) | ||
# quality of step | ||
qual = (n2AR - n2R) / (n2PR - n2R) | ||
# adjust lambda | ||
if qual > 0.75 | ||
# good quality, extend trust region | ||
lambda = max(1e-16, lambda / nu) | ||
elseif qual < 1e-3 | ||
# bad quality, shrink trust region | ||
lambda = min(1e16, lambda * nu) | ||
end | ||
lm_params[:] = [lambda, nu, qual] | ||
return nothing | ||
end | ||
|
||
function _sim_lm_first_step( | ||
x::AbstractArray{Float64}, | ||
Δx::AbstractArray{Float64}, | ||
sd::StackedTimeSolverData; | ||
lm_params=[0.1, 8.0, 0.0] | ||
) | ||
|
||
lambda, nu, qual = lm_params | ||
|
||
R, FJ = stackedtime_RJ(x, x, sd; factorization=:none) | ||
J = FJ.A | ||
|
||
nx = length(R) | ||
JtR = J'R | ||
M = J'J | ||
# max is to make sure no zeros in Jd | ||
Jd = max.(1e-10, vec(sum(abs2, J; dims=1))) | ||
n2R = sum(abs2, R) | ||
|
||
qual = 0.0 | ||
coef = 1.0 | ||
n2AR = -1.0 | ||
while qual < 1e-3 | ||
for i = 1:nx | ||
@inbounds M[i, i] += lambda * Jd[i] | ||
# the above should be diag(M) += lambda * Jd | ||
# we have coef = (1-1/nu) for the iterations after the first one | ||
# in order to subtract the previous lambda from diag(M) | ||
# each iteration we shrink the trust region by setting lambda *= nu | ||
end | ||
Δx .= M \ JtR | ||
# predicted residual squared norm | ||
n2PR = sum(abs2, R - J * Δx) | ||
# actual residual and its squared norm | ||
x_buf = copy(x) | ||
assign_update_step!(x_buf, -1.0, Δx, sd) | ||
qual = 0.0 | ||
try | ||
stackedtime_R!(R, x_buf, x_buf, sd) | ||
n2AR = sum(abs2, R) | ||
# quality of step | ||
qual = (n2AR - n2R) / (n2PR - n2R) | ||
catch | ||
end | ||
if qual > 0.75 # very good quality, extend the trust region | ||
lambda = lambda / nu | ||
break | ||
end | ||
if lambda >= 1e16 # lambda is getting too large | ||
lambda = 1e16 | ||
break | ||
end | ||
# shrink the trust regions | ||
lambda = lambda * nu | ||
coef = 1.0 - 1.0 / nu | ||
end | ||
lm_params[:] = [lambda, nu, qual] | ||
return nothing | ||
end | ||
|
||
""" | ||
sim_lm!(x, sd, maxiter, tol, verbose [, linesearch]) | ||
Solve the simulation problem using the Levenberg–Marquardt method. | ||
* `x` - the array of data for the simulation. All initial, final and exogenous | ||
conditions are already in place. | ||
* `sd::AbstractSolverData` - the solver data constructed for the simulation | ||
problem. | ||
* `maxiter` - maximum number of iterations. | ||
* `tol` - desired accuracy. | ||
* `verbose` - whether or not to print progress information. | ||
""" | ||
function sim_lm!(x::AbstractArray{Float64}, sd::StackedTimeSolverData, | ||
maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false | ||
) | ||
|
||
@warn "Levenberg–Marquardt method is experimental." | ||
if verbose | ||
@info "Simulation using Levenberg–Marquardt method." | ||
end | ||
|
||
lm_params = [0.1, 8.0, 0.0] | ||
Δx = Vector{Float64}(undef, size(sd.J, 1)) | ||
R = Vector{Float64}(undef, size(sd.J, 1)) | ||
stackedtime_R!(R, x, x, sd) | ||
nR0 = 1.0 | ||
nR0 = norm(R, Inf) | ||
if verbose | ||
@info "0, || Fx || = $(nR0)" | ||
end | ||
# step 1 | ||
_sim_lm_first_step(x, Δx, sd; lm_params) | ||
assign_update_step!(x, -1.0, Δx, sd) | ||
stackedtime_R!(R, x, x, sd) | ||
nR = norm(R, Inf) | ||
nΔx = norm(Δx, Inf) | ||
if verbose | ||
@info "1, || Fx || = $(nR), || Δx || = $(nΔx), lambda = $(lm_params[1]), qual = $(lm_params[3])" | ||
end | ||
for it = 2:maxiter | ||
if nR < tol | ||
return true | ||
end | ||
_sim_lm_step(x, Δx, sd; lm_params) | ||
assign_update_step!(x, -1.0, Δx, sd) | ||
stackedtime_R!(R, x, x, sd) | ||
nR = norm(R, Inf) | ||
nΔx = norm(Δx, Inf) | ||
if verbose | ||
@info "$it, || Fx || = $(nR), || Δx || = $(nΔx), lambda = $(lm_params[1]), qual = $(lm_params[3])" | ||
end | ||
if (nΔx < 1e-5 || true ) && ((4nR < nR0 && nR < 1e-2) || (nR / nR0 > 0.8)) && (lm_params[1] <= 1e-4) | ||
if verbose | ||
@info " --- switching to Newton-Raphson --- " | ||
end | ||
sd.J_factorized[] = nothing | ||
return sim_nr!(x, sd, maxiter, tol, verbose, linesearch) | ||
end | ||
# nR0 = nR | ||
end | ||
return false | ||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
################################################################################## | ||
# This file is part of StateSpaceEcon.jl | ||
# BSD 3-Clause License | ||
# Copyright (c) 2020-2023, Bank of Canada | ||
# All rights reserved. | ||
################################################################################## | ||
|
||
""" | ||
sim_nr!(x, sd, maxiter, tol, verbose [, linesearch]) | ||
Solve the simulation problem. | ||
* `x` - the array of data for the simulation. All initial, final and exogenous | ||
conditions are already in place. | ||
* `sd::AbstractSolverData` - the solver data constructed for the simulation | ||
problem. | ||
* `maxiter` - maximum number of iterations. | ||
* `tol` - desired accuracy. | ||
* `verbose` - whether or not to print progress information. | ||
* `linesearch::Bool` - When `true` the Newton-Raphson is modified to include a | ||
search along the descent direction for a sufficient decrease in f. It will | ||
do this at each iteration. Default is `false`. | ||
""" | ||
@timeit_debug timer function sim_nr!(x::AbstractArray{Float64}, sd::StackedTimeSolverData, | ||
maxiter::Int64, tol::Float64, verbose::Bool, linesearch::Bool=false) | ||
for it = 1:maxiter | ||
Fx = stackedtime_R!(Vector{Float64}(undef, size(sd.J, 1)), x, x, sd) | ||
nFx = norm(Fx, Inf) | ||
if nFx < tol | ||
if verbose | ||
@info "$it, || Fx || = $(nFx)" | ||
end | ||
return true | ||
end | ||
Δx, Jx = stackedtime_RJ(x, x, sd) | ||
Δx = sf_solve!(Jx, Δx) | ||
|
||
λ = 1.0 | ||
if linesearch | ||
nf = norm(Fx) | ||
# the Armijo rule: C.T.Kelly, Iterative Methods for Linear and Nonlinear Equations, ch.8.1, p.137 | ||
α = 1e-4 | ||
σ = 0.5 | ||
while λ > 0.00001 | ||
x_buf = copy(x) | ||
assign_update_step!(x_buf, -λ, Δx, sd) | ||
nrb2 = try | ||
stackedtime_R!(Fx, x_buf, x_buf, sd) | ||
norm(Fx) | ||
catch e | ||
Inf | ||
end | ||
if nrb2 < (1.0 - α * λ) * nf | ||
# if verbose && λ < 1.0 | ||
# @info "Linesearch success with λ = $λ." | ||
# end | ||
break | ||
end | ||
λ = σ * λ | ||
end | ||
if verbose | ||
if λ <= 0.00001 | ||
@warn "Linesearch failed." | ||
elseif λ < 1.0 | ||
@info "Linesearch success with λ=$λ" | ||
end | ||
end | ||
end | ||
nΔx = λ * norm(vec(Δx), Inf) | ||
assign_update_step!(x, -λ, Δx, sd) | ||
if verbose | ||
@info "$it, || Fx || = $(nFx), || Δx || = $(nΔx)" | ||
end | ||
if nΔx < tol | ||
return true | ||
end | ||
end | ||
return false | ||
end | ||
|
Oops, something went wrong.