diff --git a/Project.toml b/Project.toml index 51bcf8b94..eb2561631 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "FrankWolfe" uuid = "f55ce6ea-fdc5-4628-88c5-0087fe54bd30" authors = ["ZIB-IOL"] -version = "0.4.0" +version = "0.4.2" [deps] Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97" @@ -44,6 +44,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07" FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000" ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6" +HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b" Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2" JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" JuMP = "4076af6c-e467-56ae-b986-b466b2749572" @@ -59,4 +60,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea" [targets] -test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] +test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"] diff --git a/docs/src/advanced.md b/docs/src/advanced.md index a404fad85..ea438ca74 100644 --- a/docs/src/advanced.md +++ b/docs/src/advanced.md @@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example. ## Specialized active set for quadratic functions -If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`. +If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`. It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration. The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that: ```math @@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by \mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x. ``` -In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$. +In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$. The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$. In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice. diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md index f0ef0b2ae..34e71f040 100644 --- a/docs/src/reference/3_backend.md +++ b/docs/src/reference/3_backend.md @@ -4,7 +4,7 @@ ```@autodocs Modules = [FrankWolfe] -Pages = ["active_set.jl"] +Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"] ``` ## Functions and gradients @@ -67,6 +67,7 @@ FrankWolfe.select_update_indices FrankWolfe.FullUpdate FrankWolfe.CyclicUpdate FrankWolfe.StochasticUpdate +FrankWolfe.LazyUpdate ``` ## Update step for block-coordinate Frank-Wolfe diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl new file mode 100644 index 000000000..159d204ff --- /dev/null +++ b/examples/alternating_projections.jl @@ -0,0 +1,37 @@ +import FrankWolfe +using LinearAlgebra +using Random + + +include("../examples/plot_utils.jl") + +Random.seed!(100) + +n = 500 +lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n]) +lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n]) + + +trajectories = [] + +methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient] + +for (i,m) in enumerate(methods) + if i == 1 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m) + elseif i== 2 + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true) + else + x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true) + end + push!(trajectories, traj_data) +end + + + +labels = ["FW", "BPCG" ,"BPCG (reuse)"] + +plot_trajectories(trajectories, labels, xscalelog=true) + + + diff --git a/examples/birkhoff_polytope.jl b/examples/birkhoff_polytope.jl index e181ba69b..c81771ca7 100644 --- a/examples/birkhoff_polytope.jl +++ b/examples/birkhoff_polytope.jl @@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles( x -> cf(x, xp, normxp2), (str, x) -> cgrad!(str, x, xp), lmo, - FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, diff --git a/examples/birkhoff_polytope_symmetric.jl b/examples/birkhoff_polytope_symmetric.jl index db1c79a55..8e1c3933d 100644 --- a/examples/birkhoff_polytope_symmetric.jl +++ b/examples/birkhoff_polytope_symmetric.jl @@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n)) x -> cf(x, xp, normxp2), (str, x) -> cgrad!(str, x, xp), lmo_nat, - FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2); + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2); max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, @@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n)) # here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal # each solution of the LMO can then be added to the active set together with its orbit # on top of that, the effective dimension of the space is reduced -# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO -# `reduce` maps a matrix to the invariant vector space +# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO +# `deflate` maps a matrix to the invariant vector space # `inflate` maps a vector in this space back to a matrix -# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate` -function build_reduce_inflate(p::Matrix{T}) where {T <: Number} +# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate` +function build_deflate_inflate(p::Matrix{T}) where {T <: Number} n = size(p, 1) @assert n == size(p, 2) # square matrix - dimension = floor(Int, (n+1)^2 / 4) # reduced dimension - function reduce(A::AbstractMatrix{T}, lmo) + dimension = floor(Int, (n+1)^2 / 4) # deflated dimension + function deflate(A::AbstractMatrix{T}, lmo) vec = Vector{T}(undef, dimension) cnt = 0 @inbounds for i in 1:(n+1)÷2, j in i:n+1-i @@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number} end end end - return FrankWolfe.SymmetricArray(A, vec) + return FrankWolfe.SubspaceVector(A, vec) end - function inflate(x::FrankWolfe.SymmetricArray, lmo) + function inflate(x::FrankWolfe.SubspaceVector, lmo) cnt = 0 @inbounds for i in 1:(n+1)÷2, j in i:n+1-i cnt += 1 @@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number} end return x.data end - return reduce, inflate + return deflate, inflate end -reduce, inflate = build_reduce_inflate(xpi) -const rxp = reduce(xpi, nothing) -@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate +deflate, inflate = build_deflate_inflate(xpi) +const rxp = deflate(xpi, nothing) +@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate -lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate) +lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate) -rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing)) +rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing)) @time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( x -> cf(x, rxp, normxp2), (str, x) -> cgrad!(str, x, rxp), lmo_sym, - FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2); + FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2); max_iteration=k, line_search=FrankWolfe.Shortstep(2/n^2), lazy=true, diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl new file mode 100644 index 000000000..828436201 --- /dev/null +++ b/examples/blended_pairwise_with_direct.jl @@ -0,0 +1,174 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +Note the special structure of f(x) = norm(x - x0)^2 that we assume here + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10_000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_standard), +); + +# Just projection quadratic +trajectoryBPCG_quadratic = [] +as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic), +); + +as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp) + +# with quadratic active set +trajectoryBPCG_quadratic_as = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_as), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_direct_generic = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +# with LP acceleration +trajectoryBPCG_quadratic_noqas = [] + +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +# Update the data and labels for plotting +data_trajectories = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic, + trajectoryBPCG_quadratic_as, + trajectoryBPCG_quadratic_direct, + trajectoryBPCG_quadratic_direct_generic, + trajectoryBPCG_quadratic_noqas, +] +labels_trajectories = [ + "BPCG (Standard)", + "BPCG (Specific Direct)", + "AS_Quad", + "Reloaded", + "Reloaded_generic", + "Reloaded_noqas", +] + +# Plot trajectories +plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false) diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl new file mode 100644 index 000000000..ab934edd2 --- /dev/null +++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl @@ -0,0 +1,146 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI + +include("../examples/plot_utils.jl") + +n = Int(1e2) +k = 10000 + +# s = rand(1:100) +s = 10 +@info "Seed $s" +Random.seed!(s) + +A = let + A = randn(n, n) + A' * A +end +@assert isposdef(A) == true + +const y = Random.rand(Bool, n) * 0.6 .+ 0.3 + +function f(x) + d = x - y + return dot(d, A, d) +end + +function grad!(storage, x) + mul!(storage, A, x) + return mul!(storage, A, y, -2, 2) +end + + +# lmo = FrankWolfe.KSparseLMO(5, 1000.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0); +lmo = FrankWolfe.UnitSimplexOracle(10000.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +callback = build_callback(trajectoryBPCG_standard) + +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Adaptive(), + print_iter=k / 10, + memory_mode=FrankWolfe.InplaceEmphasis(), + verbose=true, + trajectory=true, + callback=callback, +); + +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100), +) +trajectoryBPCG_quadratic_automatic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic2 = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic2, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic2), +); + + +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2), +) +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_standard, + max_iteration=k, + verbose=true, + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), +); + + +dataSparsity = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic_automatic, + trajectoryBPCG_quadratic_automatic_standard, +] +labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"] + + +# Plot trajectories +plot_trajectories(dataSparsity, labelSparsity, xscalelog=false) diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl new file mode 100644 index 000000000..7700268bb --- /dev/null +++ b/examples/blended_pairwise_with_sparsify.jl @@ -0,0 +1,94 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS + +include("../examples/plot_utils.jl") + +n = Int(1e4) +k = 10000 + +s = 10 +@info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +# lmo = FrankWolfe.KSparseLMO(2, 1.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0); +# lmo = FrankWolfe.UnitSimplexOracle(1.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100) + +trajectoryBPCG_standard = [] +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + trajectory=true, + callback=build_callback(trajectoryBPCG_standard), +); + +trajectoryBPCG_as_sparse = [] +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) +@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_sparse, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=true, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +# Reduction primal/dual error vs. sparsity of solution + +plot_data = [trajectoryBPCG_standard, trajectoryBPCG_as_sparse] +plot_labels = ["BPCG (Standard)", "BPCG (Sparsify)"] + +# Plot sparsity +plot_sparsity(plot_data, plot_labels, legend_position=:topright) diff --git a/examples/docs_12_quadratic_symmetric.jl b/examples/docs_12_quadratic_symmetric.jl index 82ca5bd12..160c7e86e 100644 --- a/examples/docs_12_quadratic_symmetric.jl +++ b/examples/docs_12_quadratic_symmetric.jl @@ -1,7 +1,7 @@ # # Accelerations for quadratic functions and symmetric problems -# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SymmetricLMO`. -# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadratic`. +# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SubspaceLMO`. +# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadraticProductCaching`. # The specific problem we consider here comes from quantum information and some context can be found [here](https://arxiv.org/abs/2302.04721). # Formally, we want to find the distance between a tensor of size `m^N` and the `N`-partite local polytope which is defined by its vertices @@ -124,11 +124,11 @@ println() #hide # A first acceleration can be obtained by using the active set specialized for the quadratic objective function, # whose gradient is here ``x-p``, explaining the hessian and linear part provided as arguments. -# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, the best and worst -# atoms currently in the active set. +# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, +# the best and worst atoms currently in the active set. -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_naive = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_naive = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, asq_naive; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -165,9 +165,9 @@ println() #hide # a very small speedup by precomputing and storing `Combinatorics.permutations(1:N)` # in a dedicated field of our custom LMO. -lmo_permutedims = FrankWolfe.SymmetricLMO(lmo_naive, reynolds_permutedims) -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_permutedims = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +lmo_permutedims = FrankWolfe.SubspaceLMO(lmo_naive, reynolds_permutedims) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_permutedims = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, asq_permutedims; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -197,9 +197,9 @@ function build_reynolds_unique(p::Array{T, N}) where {T <: Number, N} end end -lmo_unique = FrankWolfe.SymmetricLMO(lmo_naive, build_reynolds_unique(p)) -FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_unique = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p) +lmo_unique = FrankWolfe.SubspaceLMO(lmo_naive, build_reynolds_unique(p)) +FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_unique = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p) @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, asq_unique; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide @@ -214,7 +214,7 @@ println() #hide # accelerations, especially for active set based algorithms in the regime where many lazy iterations are performed. # We refer to the example `symmetric.jl` for a small benchmark with symmetric matrices. -function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} +function build_deflate_inflate(p::Array{T, N}) where {T <: Number, N} ptol = round.(p; digits=8) ptol[ptol .== zero(T)] .= zero(T) # transform -0.0 into 0.0 as isequal(0.0, -0.0) is false uniquetol = unique(ptol[:]) @@ -227,8 +227,8 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} for (i, ind) in enumerate(indices) vec[i] = sum(A[ind]) / sqmul[i] end - return FrankWolfe.SymmetricArray(A, vec) - end, function(x::FrankWolfe.SymmetricArray, lmo) + return FrankWolfe.SubspaceVector(A, vec) + end, function(x::FrankWolfe.SubspaceVector, lmo) for (i, ind) in enumerate(indices) @view(x.data[ind]) .= x.vec[i] / sqmul[i] end @@ -236,16 +236,16 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N} end end -reduce, inflate = build_reduce_inflate(p) -p_reduce = reduce(p, nothing) -x0_reduce = reduce(x0, nothing) -f_reduce = let p_reduce = p_reduce, normp2 = normp2 - x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_reduce, x) + normp2 +deflate, inflate = build_deflate_inflate(p) +p_deflate = deflate(p, nothing) +x0_deflate = deflate(x0, nothing) +f_deflate = let p_deflate = p_deflate, normp2 = normp2 + x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_deflate, x) + normp2 end -grad_reduce! = let p_reduce = p_reduce +grad_deflate! = let p_deflate = p_deflate (storage, x) -> begin @inbounds for i in eachindex(x) - storage[i] = x[i] - p_reduce[i] + storage[i] = x[i] - p_deflate[i] end end end @@ -255,9 +255,9 @@ println() #hide # In this simple example, their shape remains unchanged, but in general this may need some # reformulation, which falls to the user. -lmo_reduce = FrankWolfe.SymmetricLMO(lmo_naive, reduce, inflate) -FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide -asq_reduce = FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce) -@time FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, asq_reduce; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) +lmo_deflate = FrankWolfe.SubspaceLMO(lmo_naive, deflate, inflate) +FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide +asq_deflate = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate) +@time FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, asq_deflate; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration) println() #hide diff --git a/examples/quadratic.jl b/examples/quadratic.jl index 923fbc4f3..450ea0901 100644 --- a/examples/quadratic.jl +++ b/examples/quadratic.jl @@ -74,7 +74,7 @@ function benchmark_Bell(p::Array{T, 2}, quadratic::Bool; fw_method=FrankWolfe.bl lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))) x0 = FrankWolfe.compute_extreme_point(lmo, -p) if quadratic - active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) else active_set = FrankWolfe.ActiveSet([(one(T), x0)]) end diff --git a/examples/quadratic_A.jl b/examples/quadratic_A.jl index 5ac879c47..c65643a82 100644 --- a/examples/quadratic_A.jl +++ b/examples/quadratic_A.jl @@ -48,7 +48,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo, zeros(T, n+1)) # active_set = FrankWolfe.ActiveSet([(1.0, x0)]) # specialized active set, automatically detecting the parameters A and b of the quadratic function f -active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], gradf) +active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], gradf) @time res = FrankWolfe.blended_pairwise_conditional_gradient( # @time res = FrankWolfe.away_frank_wolfe( diff --git a/examples/reynolds.jl b/examples/reynolds.jl index fe28bca53..0cb6f3c32 100644 --- a/examples/reynolds.jl +++ b/examples/reynolds.jl @@ -86,12 +86,12 @@ function benchmark_Bell(p::Array{T, 3}, sym::Bool; kwargs...) where {T <: Number end lmo = BellCorrelationsLMO{T}(size(p, 1), zeros(T, size(p, 1))) if sym - lmo = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint) + lmo = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint) end x0 = FrankWolfe.compute_extreme_point(lmo, -p) println("Output type of the LMO: ", typeof(x0)) active_set = FrankWolfe.ActiveSet([(one(T), x0)]) - # active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + # active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) return FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set; lazy=true, line_search=FrankWolfe.Shortstep(one(T)), kwargs...) end diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl index 6a550a963..6bf230e26 100644 --- a/examples/simple_stateless.jl +++ b/examples/simple_stateless.jl @@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl # simple step iterations about 33% faster -@test line_search.factor <= 44 +@test line_search.factor <= 51 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe( f, diff --git a/examples/symmetric.jl b/examples/symmetric.jl index 000ca53bc..1d1018cf9 100644 --- a/examples/symmetric.jl +++ b/examples/symmetric.jl @@ -57,7 +57,7 @@ function correlation_tensor_GHZ_polygon(N::Int, m::Int; type=Float64) return res end -function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number} +function build_deflate_inflate_permutedims(p::Array{T, 2}) where {T <: Number} n = size(p, 1) @assert n == size(p, 2) dimension = (n * (n + 1)) ÷ 2 @@ -72,8 +72,8 @@ function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number} vec[cnt+j] = (A[i, j] + A[j, i]) / sqrt2 end end - return FrankWolfe.SymmetricArray(A, vec) - end, function(x::FrankWolfe.SymmetricArray, lmo) + return FrankWolfe.SubspaceVector(A, vec) + end, function(x::FrankWolfe.SubspaceVector, lmo) cnt = 0 @inbounds for i in 1:n x.data[i, i] = x.vec[i] @@ -90,9 +90,9 @@ end function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_pairwise_conditional_gradient, kwargs...) where {T <: Number} Random.seed!(0) if sym - reduce, inflate = build_reduce_inflate_permutedims(p) - lmo = FrankWolfe.SymmetricLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), reduce, inflate) - p = reduce(p, lmo) + deflate, inflate = build_deflate_inflate_permutedims(p) + lmo = FrankWolfe.SubspaceLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), deflate, inflate) + p = deflate(p, lmo) else lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))) end @@ -109,7 +109,7 @@ function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_ end end x0 = FrankWolfe.compute_extreme_point(lmo, -p) - active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p) res = fw_method(f, grad!, lmo, active_set; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, verbose=false, max_iteration=10^2) return fw_method(f, grad!, lmo, res[6]; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, lazy_tolerance=10^6, kwargs...) end diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl index e39503847..253863085 100644 --- a/src/FrankWolfe.jl +++ b/src/FrankWolfe.jl @@ -36,6 +36,8 @@ include("moi_oracle.jl") include("function_gradient.jl") include("active_set.jl") include("active_set_quadratic.jl") +include("active_set_quadratic_direct_solve.jl") +include("active_set_sparsifier.jl") include("blended_cg.jl") include("afw.jl") diff --git a/src/abstract_oracles.jl b/src/abstract_oracles.jl index 5b61960f0..c1c1154e4 100644 --- a/src/abstract_oracles.jl +++ b/src/abstract_oracles.jl @@ -258,23 +258,28 @@ function compute_extreme_point( end """ - SymmetricLMO{LMO, TR, TI} + SubspaceLMO{LMO, TD, TI} -Symmetric LMO for the reduction operator defined by `TR` +Subspace LMO for the deflation operator defined by `TD` and the inflation operator defined by `TI`. -Computations are performed in the reduced subspace, and the +Computations are performed in the deflated subspace, and the effective call of the LMO first inflates the gradient, then -use the non-symmetric LMO, and finally reduces the output. +uses the full LMO, and finally deflates the output. + +Deflation operators typically project onto symmetric subspaces +or select relevant elements of a full tensor. +Scalar products should be treated carefully to ensure correctness +of the results; see also the companion `SubspaceVector` structure. """ -struct SymmetricLMO{LMO<:LinearMinimizationOracle,TR,TI} <: LinearMinimizationOracle +struct SubspaceLMO{LMO<:LinearMinimizationOracle,TD,TI} <: LinearMinimizationOracle lmo::LMO - reduce::TR + deflate::TD inflate::TI - function SymmetricLMO(lmo::LMO, reduce, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle} - return new{typeof(lmo),typeof(reduce),typeof(inflate)}( lmo, reduce, inflate) + function SubspaceLMO(lmo::LMO, deflate, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle} + return new{typeof(lmo),typeof(deflate),typeof(inflate)}( lmo, deflate, inflate) end end -function compute_extreme_point(sym::SymmetricLMO, direction; kwargs...) - return sym.reduce(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo) +function compute_extreme_point(sym::SubspaceLMO, direction; kwargs...) + return sym.deflate(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo) end diff --git a/src/active_set.jl b/src/active_set.jl index 62393b47f..f02177933 100644 --- a/src/active_set.jl +++ b/src/active_set.jl @@ -27,17 +27,7 @@ ActiveSet{AT,R}() where {AT,R} = ActiveSet{AT,R,Vector{float(eltype(AT))}}([], [ ActiveSet{AT}() where {AT} = ActiveSet{AT,Float64,Vector{float(eltype(AT))}}() function ActiveSet(tuple_values::AbstractVector{Tuple{R,AT}}) where {AT,R} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - end - x = similar(atoms[1], float(eltype(atoms[1]))) - as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x) - compute_active_set_iterate!(as) - return as + return ActiveSet{AT,R}(tuple_values) end function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) where {AT,R} @@ -48,7 +38,7 @@ function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) weights[idx] = tuple_values[idx][1] atoms[idx] = tuple_values[idx][2] end - x = similar(tuple_values[1][2], float(eltype(tuple_values[1][2]))) + x = similar(atoms[1], float(eltype(atoms[1]))) as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x) compute_active_set_iterate!(as) return as @@ -326,3 +316,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe end return active_set.x end + +function update_weights!(as::AbstractActiveSet, new_weights) + as.weights .= new_weights +end diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl index b200a8912..0b19d7dba 100644 --- a/src/active_set_quadratic.jl +++ b/src/active_set_quadratic.jl @@ -1,6 +1,6 @@ """ - ActiveSetQuadratic{AT, R, IT} + ActiveSetQuadraticProductCaching{AT, R, IT} Represents an active set of extreme vertices collected in a FW algorithm, along with their coefficients `(λ_i, a_i)`. @@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` so that the gradient is simply `∇f(x)=Ax+b`. """ -struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} +struct ActiveSetQuadraticProductCaching{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT} weights::Vector{R} atoms::Vector{AT} x::IT @@ -48,11 +48,21 @@ function detect_quadratic_function(grad!, x0; test=true) return A, b end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} - return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticProductCaching(tuple_values, A, b) end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H} + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) +end + +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) +end + +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} n = length(tuple_values) weights = Vector{R}(undef, n) atoms = Vector{AT}(undef, n) @@ -64,43 +74,24 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) @inbounds for idx in 1:n weights[idx] = tuple_values[idx][1] atoms[idx] = tuple_values[idx][2] - dots_A[idx] = Vector{R}(undef, idx) - for idy in 1:idx - dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy]) - end - dots_b[idx] = fast_dot(b, atoms[idx]) end x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + as = ActiveSetQuadraticProductCaching{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) + reset_quadratic_dots!(as) compute_active_set_iterate!(as) return as end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...) -end - -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H} - n = length(tuple_values) - weights = Vector{R}(undef, n) - atoms = Vector{AT}(undef, n) - dots_x = zeros(R, n) - dots_A = Vector{Vector{R}}(undef, n) - dots_b = Vector{R}(undef, n) - weights_prev = zeros(R, n) - modified = trues(n) - @inbounds for idx in 1:n - weights[idx] = tuple_values[idx][1] - atoms[idx] = tuple_values[idx][2] - dots_A[idx] = Vector{R}(undef, idx) +# should only be called upon construction +# for active sets with a large number of atoms, this function becomes very costly +function reset_quadratic_dots!(as::ActiveSetQuadraticProductCaching{AT,R}) where {AT,R} + @inbounds for idx in 1:length(as) + as.dots_A[idx] = Vector{R}(undef, idx) for idy in 1:idx - dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy]) + as.dots_A[idx][idy] = fast_dot(as.A * as.atoms[idx], as.atoms[idy]) end - dots_b[idx] = fast_dot(b, atoms[idx]) + as.dots_b[idx] = fast_dot(as.b, as.atoms[idx]) end - x = similar(b) - as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified) - compute_active_set_iterate!(as) return as end @@ -116,16 +107,17 @@ function Base.:*(a::Identity, b) return a.λ * b end end -function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic(tuple_values, Identity(A.λ), b) + +function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadraticProductCaching(tuple_values, Identity(A.λ), b) end -function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} - return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b) +function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R} + return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, Identity(A.λ), b) end # these three functions do not update the active set iterate -function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} +function Base.push!(as::ActiveSetQuadraticProductCaching{AT,R}, (λ, a)) where {AT,R} dot_x = zero(R) dot_A = Vector{R}(undef, length(as)) dot_b = fast_dot(as.b, a) @@ -147,7 +139,8 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R} return as end -function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) +# TODO multi-indices version +function Base.deleteat!(as::ActiveSetQuadraticProductCaching, idx::Int) @inbounds for i in 1:idx-1 as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i] end @@ -165,7 +158,7 @@ function Base.deleteat!(as::ActiveSetQuadratic, idx::Int) return as end -function Base.empty!(as::ActiveSetQuadratic) +function Base.empty!(as::ActiveSetQuadraticProductCaching) empty!(as.atoms) empty!(as.weights) as.x .= 0 @@ -178,7 +171,7 @@ function Base.empty!(as::ActiveSetQuadratic) end function active_set_update!( - active_set::ActiveSetQuadratic{AT,R}, + active_set::ActiveSetQuadraticProductCaching{AT,R}, lambda, atom, renorm=true, idx=nothing; weight_purge_threshold=weight_purge_threshold_default(R), add_dropped_vertices=false, @@ -207,7 +200,7 @@ function active_set_update!( return active_set end -function active_set_renormalize!(active_set::ActiveSetQuadratic) +function active_set_renormalize!(active_set::ActiveSetQuadraticProductCaching) renorm = sum(active_set.weights) active_set.weights ./= renorm active_set.weights_prev ./= renorm @@ -216,7 +209,7 @@ function active_set_renormalize!(active_set::ActiveSetQuadratic) return active_set end -function active_set_argmin(active_set::ActiveSetQuadratic, direction) +function active_set_argmin(active_set::ActiveSetQuadraticProductCaching, direction) valm = typemax(eltype(direction)) idxm = -1 idx_modified = findall(active_set.modified) @@ -247,7 +240,7 @@ function active_set_argmin(active_set::ActiveSetQuadratic, direction) return (active_set[idxm]..., idxm) end -function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5) +function active_set_argminmax(active_set::ActiveSetQuadraticProductCaching, direction; Φ=0.5) valm = typemax(eltype(direction)) valM = typemin(eltype(direction)) idxm = -1 @@ -288,7 +281,7 @@ function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5) end # in-place warm-start of a quadratic active set for A and b -function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b) where {AT,R,H} +function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R}, A::H, b) where {AT,R,H} @inbounds for idx in eachindex(warm_as) for idy in 1:idx warm_as.dots_A[idx][idy] = fast_dot(A * warm_as.atoms[idx], warm_as.atoms[idy]) @@ -299,7 +292,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b end # in-place warm-start of a quadratic active set for b -function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) where {AT,R,IT,H} +function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R,IT,H}, b) where {AT,R,IT,H} warm_as.dots_x .= 0 warm_as.weights_prev .= 0 warm_as.modified .= true @@ -311,3 +304,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) return warm_as end +function update_weights!(as::ActiveSetQuadraticProductCaching, new_weights) + as.weights_prev .= as.weights + as.weights .= new_weights + as.modified .= true +end diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl new file mode 100644 index 000000000..4a560377d --- /dev/null +++ b/src/active_set_quadratic_direct_solve.jl @@ -0,0 +1,343 @@ + +""" + ActiveSetQuadraticLinearSolve{AT, R, IT} + +Represents an active set of extreme vertices collected in a FW algorithm, +along with their coefficients `(λ_i, a_i)`. +`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`. +The iterate `x = ∑λ_i a_i` is stored in x with type `IT`. +The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c` +so that the gradient is `∇f(x)=Ax+b`. + +This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition. +It therefore delegates all update, deletion, and addition operations to this inner active set. +The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set. + +The structure also contains a scheduler struct which is called with the `should_solve_lp` function. +To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method. +""" +struct ActiveSetQuadraticLinearSolve{ + AT, + R<:Real, + IT, + H, + BT, + OT<:MOI.AbstractOptimizer, + AS<:AbstractActiveSet, + SF, +} <: AbstractActiveSet{AT,R,IT} + weights::Vector{R} + atoms::Vector{AT} + x::IT + A::H # Hessian matrix + b::BT # linear term + active_set::AS + lp_optimizer::OT + scheduler::SF + counter::Base.RefValue{Int} +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer) + +Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler) +end + +""" + ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer) + +Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadraticProductCaching` active set. +""" +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadraticProductCaching(tuple_values, A, b) + return ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + inner_as.A, + inner_as.b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + A::LinearAlgebra.UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) + as = ActiveSetQuadraticLinearSolve( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + inner_as::AbstractActiveSet, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) + A, b = detect_quadratic_function(grad!, inner_as.atoms[1]) + return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + grad!::Function, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + A, b = detect_quadratic_function(grad!, tuple_values[1][2]) + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + A, + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::H, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R,H} + inner_as = ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b) + as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}( + inner_as.weights, + inner_as.atoms, + inner_as.x, + A, + b, + inner_as, + lp_optimizer, + scheduler, + Ref(0), + ) + compute_active_set_iterate!(as) + return as +end + +function ActiveSetQuadraticLinearSolve( + tuple_values::Vector{Tuple{R,AT}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end +function ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values::Vector{<:Tuple{<:Number,<:Any}}, + A::UniformScaling, + b, + lp_optimizer; + scheduler=LogScheduler(), +) where {AT,R} + return ActiveSetQuadraticLinearSolve{AT,R}( + tuple_values, + Identity(A.λ), + b, + lp_optimizer; + scheduler=scheduler, + ) +end + +# all mutating functions are delegated to the inner active set + +Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple) + +Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetQuadraticLinearSolve{AT,R}, + lambda, + atom, + renorm=true, + idx=nothing; + weight_purge_threshold=weight_purge_threshold_default(R), + add_dropped_vertices=false, + vertex_storage=nothing, +) where {AT,R} + if idx === nothing + idx = find_atom(as, atom) + end + active_set_update!( + as.active_set, + lambda, + atom, + renorm, + idx; + weight_purge_threshold=weight_purge_threshold, + add_dropped_vertices=add_dropped_vertices, + vertex_storage=vertex_storage, + ) + # new atom introduced, we can solve the auxiliary LP + if idx < 0 + as.counter[] += 1 + if should_solve_lp(as, as.scheduler) + solve_quadratic_activeset_lp!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set) + +function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction) + return active_set_argmin(as.active_set, direction) +end + +function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5) + return active_set_argminmax(as.active_set, direction; Φ=Φ) +end + +# generic quadratic with quadratic information provided + +""" + solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H})) + +Solves the auxiliary LP over the current active set. +The method is specialized by type `H` of the Hessian matrix `A`. +""" +function solve_quadratic_activeset_lp!( + as::ActiveSetQuadraticLinearSolve{AT,R,IT,H}, +) where {AT,R,IT,H} + nv = length(as) + o = as.lp_optimizer + MOI.empty!(o) + λ = MOI.add_variables(o, nv) + # λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0)) + sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0) + MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0)) + # Vᵗ A V λ == -Vᵗ b + for atom in as.atoms + lhs = MOI.ScalarAffineFunction{Float64}([], 0.0) + Base.sizehint!(lhs.terms, nv) + # replaces direct sum because of MOI and MutableArithmetic slow sums + for j in 1:nv + push!( + lhs.terms, + _compute_quadratic_constraint_term(atom, as.A, as.atoms[j], λ[j]), + ) + end + rhs = -dot(atom, as.b) + MOI.add_constraint(o, lhs, MOI.EqualTo(rhs)) + end + MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables) + MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(o) + if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + return as + end + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx]) + if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value)) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set, indices_to_remove) + @assert length(as) == length(new_weights) + update_weights!(as.active_set, new_weights) + active_set_cleanup!(as) + active_set_renormalize!(as) + compute_active_set_iterate!(as) + return as +end + +function _compute_quadratic_constraint_term(atom1, A::AbstractMatrix, atom2, λ) + return MOI.ScalarAffineTerm(fast_dot(atom1, A, atom2), λ) +end + +function _compute_quadratic_constraint_term(atom1, A::Union{Identity,LinearAlgebra.UniformScaling}, atom2, λ) + return MOI.ScalarAffineTerm(A.λ * fast_dot(atom1, atom2), λ) +end + +struct LogScheduler{T} + start_time::Int + scaling_factor::T + max_interval::Int + current_interval::Base.RefValue{Int} + last_solve_counter::Base.RefValue{Int} +end + +LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) = + LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0)) + +function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler) + if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[] + scheduler.last_solve_counter[] = as.counter[] + scheduler.current_interval[] = min( + round(Int, scheduler.scaling_factor * scheduler.current_interval[]), + scheduler.max_interval, + ) + return true + end + return false +end diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl new file mode 100644 index 000000000..6a408a0ac --- /dev/null +++ b/src/active_set_sparsifier.jl @@ -0,0 +1,96 @@ +struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <: + AbstractActiveSet{AT,R,IT} + active_set::AS + weights::Vector{R} + atoms::Vector{AT} + x::IT + optimizer::OT + minimum_vertices::Int + solve_frequency::Int + counter::Base.RefValue{Int} +end + +function ActiveSetSparsifier( + active_set::AbstractActiveSet, + optimizer::MOI.AbstractOptimizer; + minimum_vertices=50, + solve_frequency=50, +) + return ActiveSetSparsifier( + active_set, + active_set.weights, + active_set.atoms, + active_set.x, + optimizer, + minimum_vertices, + solve_frequency, + Ref(0), + ) +end + +function Base.push!(as::ActiveSetSparsifier, (λ, a)) + return push!(as.active_set, (λ, a)) +end + +Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx) + +Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set) + +function active_set_update!( + as::ActiveSetSparsifier{AT,R,IT,AS}, + lambda, + atom, + renorm=true, + idx=nothing; + kwargs..., +) where {AS,AT,R,IT} + active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...) + n = length(as) + as.counter[] += 1 + if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0 + # sparsifying active set + MOI.empty!(as.optimizer) + x0 = as.active_set.x + λ = MOI.add_variables(as.optimizer, n) + # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1 + MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0)) + MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0)) + x_sum = 0 * as.active_set.atoms[1] + for (idx, atom) in enumerate(as.active_set.atoms) + x_sum += λ[idx] * atom + end + for idx in eachindex(x_sum) + MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx])) + end + # Set a dummy objective (minimize ∑λ) + dummy_objective = sum(λ; init=0.0) + MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective) + MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE) + MOI.optimize!(as.optimizer) + if MOI.get(as.optimizer, MOI.TerminationStatus()) in + (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL) + indices_to_remove = Int[] + new_weights = R[] + for idx in eachindex(λ) + weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx]) + if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value))) + push!(indices_to_remove, idx) + else + push!(new_weights, weight_value) + end + end + deleteat!(as.active_set.atoms, indices_to_remove) + deleteat!(as.active_set.weights, indices_to_remove) + @assert length(as) == length(new_weights) + as.active_set.weights .= new_weights + active_set_renormalize!(as) + end + end + return as +end + +active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set) + +active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction) +active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) = + active_set_argminmax(as.active_set, direction; Φ=Φ) diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl index ec1c5b2f5..d07a73649 100644 --- a/src/alternating_methods.jl +++ b/src/alternating_methods.jl @@ -35,12 +35,12 @@ end Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`. The tuple `x0` defines the initial points for each domain. -Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `primal` primal value `f(x)` - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_linear_minimization( @@ -49,17 +49,17 @@ function alternating_linear_minimization( grad!, lmos::NTuple{N,LinearMinimizationOracle}, x0::Tuple{Vararg{Any,N}}; - lambda::Union{Float64, Function}=1.0, + lambda::Union{Float64,Function}=1.0, verbose=false, trajectory=false, callback=nothing, max_iteration=10000, - print_iter = max_iteration / 10, + print_iter=max_iteration / 10, memory_mode=InplaceEmphasis(), line_search::LS=Adaptive(), epsilon=1e-7, kwargs..., -) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} +) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}} x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0)) gradf = similar(x0_bc) @@ -74,21 +74,14 @@ function alternating_linear_minimization( for i in 1:N grad!(gradf.blocks[i], x.blocks[i]) end - t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] + t = [N * b - sum(x.blocks) for b in x.blocks] return storage.blocks = λ[] * gradf.blocks + t end end - function dist2(x::BlockVector) - s = 0 - for i=1:N - for j=1:i-1 - diff = x.blocks[i] - x.blocks[j] - s += fast_dot(diff, diff) - end - end - return s - end + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function build_objective() λ = Ref(λ0) @@ -122,8 +115,11 @@ function alternating_linear_minimization( num_type = eltype(x0[1]) grad_type = eltype(gradf.blocks[1]) - line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) - println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration") + line_search_type = + line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search) + println( + "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration", + ) println("TYPE: $num_type GRADIENTTYPE: $grad_type") println("LAMBDA: $lambda") @@ -177,7 +173,7 @@ function alternating_linear_minimization( end if lambda isa Function - callback = function (state,args...) + callback = function (state, args...) state.f.λ[] = lambda(state) state.grad!.λ[] = state.f.λ[] @@ -211,33 +207,15 @@ function alternating_linear_minimization( end -function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3) - f(x) = sum(abs2, x - y) - grad!(storage, x) = storage .= 2 * (x - y) - - x0 = FrankWolfe.compute_extreme_point(lmo, y) - x_opt, _ = FrankWolfe.frank_wolfe( - f, - grad!, - lmo, - x0, - epsilon=eps, - max_iteration=max_iter, - trajectory=true, - line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false), - ) - return x_opt -end - """ alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N} Computes a point in the intersection of feasible domains specified by `lmos`. -Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with: +Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with: - `x` cartesian product of final iterates - `v` cartesian product of last vertices of the LMOs - `dual_gap` final Frank-Wolfe gap -- `infeas` sum of squared, pairwise distances between iterates +- `dist2` is 1/2 * sum of squared, pairwise distances between iterates - `traj_data` vector of trajectory information. """ function alternating_projections( @@ -252,6 +230,10 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} return alternating_projections( ProductLMO(lmos), @@ -265,6 +247,10 @@ function alternating_projections( callback, traj_data, timeout, + proj_method, + inner_epsilon, + reuse_active_set, + kwargs..., ) end @@ -281,17 +267,21 @@ function alternating_projections( callback=nothing, traj_data=[], timeout=Inf, + proj_method=frank_wolfe, + inner_epsilon::Function=t -> 1 / (t^2 + 1), + reuse_active_set=false, + kwargs..., ) where {N} # header and format string for output of the algorithm - headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"] + headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"] format_string = "%6s %13s %14e %14e %14e %14e\n" - function format_state(state, infeas) + function format_state(state, primal) rep = ( steptype_string[Symbol(state.step_type)], string(state.t), Float64(state.dual_gap), - Float64(infeas), + Float64(primal), state.time, state.t / state.time, ) @@ -300,27 +290,61 @@ function alternating_projections( t = 0 dual_gap = Inf - x = fill(x0, N) - v = similar(x) + dual_gaps = fill(Inf, N) + x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N))) step_type = ST_REGULAR gradient = similar(x) - ndim = ndims(x) - infeasibility(x) = sum( - fast_dot( - selectdim(x, ndim, i) - selectdim(x, ndim, j), - selectdim(x, ndim, i) - selectdim(x, ndim, j), - ) for i in 1:N for j in 1:i-1 - ) + if reuse_active_set + if proj_method ∉ + [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe] + error("The selected FW method does not support active sets reuse.") + end + active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N] + end - partial_infeasibility(x) = - sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N) + dist2(x::BlockVector) = + 0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) - + sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1) function grad!(storage, x) - @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N] + return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks] end - projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1)) + function projection_step(i, t) + xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block + f(y) = sum(abs2, y - xii) + function grad_proj!(storage, y) + return storage .= 2 * (y - xii) + end + + if reuse_active_set + + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + active_sets[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + active_sets[i] = results[:active_set] + else + results = proj_method( + f, + grad_proj!, + lmo.lmos[i], + x.blocks[i]; + epsilon=inner_epsilon(t), + max_iteration=10000, + line_search=Adaptive(), + kwargs..., + ) + end + return results[:x], results[:dual_gap] + end if trajectory @@ -372,19 +396,18 @@ function alternating_projections( # Projection step: for i in 1:N # project the previous iterate on the i-th feasible region - x[i] = projection_step(x[mod(i - 2, N)+1], i, t) + x.blocks[i], dual_gaps[i] = projection_step(i, t) end + dual_gap = sum(dual_gaps) + # Update gradients grad!(gradient, x) - # Update dual gaps - v = compute_extreme_point.(lmo.lmos, gradient) - dual_gap = fast_dot(x - v, gradient) # go easy on the memory - only compute if really needed if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing) - infeas = infeasibility(x) + primal = dist2(x) end first_iter = false @@ -393,12 +416,12 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, - v, + nothing, nothing, nothing, nothing, @@ -408,7 +431,7 @@ function alternating_projections( step_type, ) # @show state - if callback(state, infeas) === false + if callback(state, primal) === false break end end @@ -419,9 +442,9 @@ function alternating_projections( # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting # hence the final computation. step_type = ST_LAST - infeas = infeasibility(x) + primal = dist2(x) grad!(gradient, x) - v = compute_extreme_point.(lmo.lmos, gradient) + v = compute_extreme_point(lmo, gradient) dual_gap = fast_dot(x - v, gradient) tot_time = (time_ns() - time_start) / 1.0e9 @@ -429,8 +452,8 @@ function alternating_projections( if callback !== nothing state = CallbackState( t, - infeas, - infeas - dual_gap, + primal, + primal - dual_gap, dual_gap, tot_time, x, @@ -443,9 +466,9 @@ function alternating_projections( gradient, step_type, ) - callback(state, infeas) + callback(state, primal) end - return x, v, dual_gap, infeas, traj_data + return x, v, dual_gap, primal, traj_data end diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl index e2efec4d2..a42862063 100644 --- a/src/blended_pairwise.jl +++ b/src/blended_pairwise.jl @@ -1,4 +1,3 @@ - """ blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...) @@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient( linesearch_workspace, memory_mode, ) - if callback !== nothing state = CallbackState( t, diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl index deb8a5a25..0d034a7d3 100644 --- a/src/block_coordinate_algorithms.jl +++ b/src/block_coordinate_algorithms.jl @@ -41,20 +41,57 @@ end StochasticUpdate() = StochasticUpdate(-1) -mutable struct DualGapOrder <: BlockCoordinateUpdateOrder +""" +The dual gap order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual gaps. +""" +struct DualGapOrder <: BlockCoordinateUpdateOrder limit::Int end DualGapOrder() = DualGapOrder(-1) +""" +The dual progress order initiates one round of `ļimit` many updates. +The according blocks are sampled with probabilties proportional to their respective dual progress. +""" mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder limit::Int previous_dual_gaps::Vector{Float64} dual_progress::Vector{Float64} + last_indices::Vector{Int} end -DualProgressOrder() = DualProgressOrder(-1, [], []) -DualProgressOrder(i::Int64) = DualProgressOrder(i, [], []) +DualProgressOrder() = DualProgressOrder(-1, [], [], []) +DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], []) + +""" +The Lazy update order is discussed in "Flexible block-iterative +analysis for the Frank-Wolfe algorithm," by Braun, Pokutta, & +Woodstock (2024). +'lazy_block' is an index of a computationally expensive block to +update; +'refresh_rate' describes the frequency at which we perform +a full activation; and +'block_size' describes the number of "faster" blocks +(i.e., those excluding 'lazy_block') activated (chosen +uniformly at random) during each +of the "faster" iterations; for more detail, see the +article. If 'block_size' is unspecified, this defaults to +1. +Note: This methodology is currently only proven to work +with 'FrankWolfe.Shortstep' linesearches and a (not-yet +implemented) adaptive method; see the article for details. +""" +struct LazyUpdate <: BlockCoordinateUpdateOrder + lazy_block::Int + refresh_rate::Int + block_size::Int +end + +function LazyUpdate(lazy_block::Int,refresh_rate::Int) + return LazyUpdate(lazy_block, refresh_rate, 1) +end function select_update_indices(::FullUpdate, s::CallbackState, _) return [1:length(s.lmo.lmos)] @@ -94,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _) return [[rand(1:l)] for i in 1:u.limit] end +function sample_without_replacement(n::Int, weights::Vector{Float64}) + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + indices = zeros(Int, n) + for i in 1:n + indices[i] = findfirst(cumsum_weights .> rand()) + weights[indices[i]] = 0 + weights = weights ./ sum(weights) + cumsum_weights = cumsum(weights) + end + return indices +end + function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps) l = length(s.lmo.lmos) @@ -101,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps @assert u.limit <= l @assert u.limit > 0 || u.limit == -1 - # In the first iteration update every block so we get finite dual progress - if s.t < 2 - u.previous_dual_gaps = copy(dual_gaps) - return [[i] for i=1:l] - end - - if s.t == 1 - u.dual_progress = dual_gaps - u.previous_dual_gaps + # In the first two iterations update every block so we get finite dual progress + if s.t < 2 + u.dual_progress = ones(l) + indices = [[i for i=1:l]] else - diff = dual_gaps - u.previous_dual_gaps - u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)] + # Update dual progress only on updated blocks + for i in u.last_indices + d = u.previous_dual_gaps[i] - dual_gaps[i] + if d < 0 + u.dual_progress[i] = 0 + else + u.dual_progress[i] = d + end + end + n = u.limit == -1 ? l : u.limit + + # If less than n blocks have non-zero dual progress, update all of them + if sum(u.dual_progress .!= 0) < n + indices = [[i for i=1:l]] + else + indices = [sample_without_replacement(n, u.dual_progress)] + end end u.previous_dual_gaps = copy(dual_gaps) - - n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n] + u.last_indices = vcat(indices...) + return indices end function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) @@ -128,11 +188,20 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps) # In the first iteration update every block so we get finite dual gaps if s.t < 1 - return [[i] for i=1:l] + return [[i for i=1:l]] end n = u.limit == -1 ? l : u.limit - return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n] + return [sample_without_replacement(n, dual_gaps)] +end + +function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps) + #Returns a sequence of randomized cheap indices by + #excluding update.lazy_block until "refresh_rate" updates + #occur, then adds an update of everything while mainting + #randomized order. + l = length(s.lmo.lmos) + return push!([[rand(range(1,l)[1:l .!= update.lazy_block]) for _ in range(1,update.block_size)] for _ in 1:(update.refresh_rate -1)], range(1,l)) end """ @@ -200,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep phi::Float64 end -function Base.copy(::FrankWolfeStep) - return FrankWolfeStep() -end +Base.copy(::FrankWolfeStep) = FrankWolfeStep() function Base.copy(obj::BPCGStep) if obj.active_set === nothing - return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf) + return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi) else return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi) end end BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf) +BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf) function update_iterate( ::FrankWolfeStep, @@ -287,7 +355,7 @@ function update_iterate( # minor modification from original paper for improved sparsity # (proof follows with minor modification when estimating the step) - if local_gap > s.phi / s.lazy_tolerance + if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy d = muladd_memory_mode(memory_mode, d, a, v_local) vertex_taken = v_local gamma_max = a_lambda diff --git a/src/linesearch.jl b/src/linesearch.jl index dd27e44d1..f974b9c38 100644 --- a/src/linesearch.jl +++ b/src/linesearch.jl @@ -369,28 +369,31 @@ Convergence is not guaranteed in general. # References - [Secant Method](https://en.wikipedia.org/wiki/Secant_method) """ -struct Secant{F} <: LineSearchMethod +struct Secant{F,LSM<:LineSearchMethod} <: LineSearchMethod + inner_ls::LSM limit_num_steps::Int tol::Float64 domain_oracle::F end function Secant(limit_num_steps, tol) - return Secant(limit_num_steps, tol, x -> true) + return Secant(Backtracking(), limit_num_steps, tol, x -> true) end -function Secant(; limit_num_steps=40, tol=1e-8) - return Secant(limit_num_steps, tol) +function Secant(;inner_ls=Backtracking(), limit_num_steps=40, tol=1e-8, domain_oracle=(x -> true)) + return Secant(inner_ls, limit_num_steps, tol, domain_oracle) end -mutable struct SecantWorkspace{XT,GT} +mutable struct SecantWorkspace{XT,GT, IWS} + inner_ws::IWS x::XT gradient::GT last_gamma::Float64 end -function build_linesearch_workspace(::Secant, x, gradient) - return SecantWorkspace(similar(x), similar(gradient), 1.0) # Initialize last_gamma to 1.0 +function build_linesearch_workspace(ls::Secant, x, gradient) + inner_ws = build_linesearch_workspace(ls.inner_ls, x, gradient) + return SecantWorkspace(inner_ws, similar(x), similar(gradient), 1.0) # Initialize last_gamma to 1.0 end function perform_line_search( @@ -422,7 +425,7 @@ function perform_line_search( while abs(dot_gdir) > line_search.tol if i > line_search.limit_num_steps workspace.last_gamma = best_gamma # Update last_gamma before returning - return best_gamma + break end grad!(grad_storage, storage) @@ -430,7 +433,7 @@ function perform_line_search( if dot_gdir_new ≈ dot_gdir workspace.last_gamma = best_gamma # Update last_gamma before returning - return best_gamma + break end gamma_new = gamma - dot_gdir_new * (gamma - gamma_prev) / (dot_gdir_new - dot_gdir) @@ -448,6 +451,33 @@ function perform_line_search( dot_gdir = dot_gdir_new i += 1 end + if abs(dot_gdir) > line_search.tol + # Choose gamma_max to be domain feasible + storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d) + while !line_search.domain_oracle(storage) + gamma_max /= 2 + storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d) + end + gamma = perform_line_search( + line_search.inner_ls, + 0, + f, + grad!, + gradient, + x, + d, + gamma_max, + workspace.inner_ws, + memory_mode, + ) + + storage = muladd_memory_mode(memory_mode, storage, x, gamma, d) + new_val = f(storage) + + if new_val <= best_val + best_gamma = gamma + end + end workspace.last_gamma = best_gamma # Update last_gamma before returning return best_gamma end diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl index a0c1b3d01..ba37356b0 100644 --- a/src/moi_oracle.jl +++ b/src/moi_oracle.jl @@ -1,6 +1,6 @@ """ - MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle + MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer. The oracle call sets the direction and reruns the optimizer. diff --git a/src/pairwise.jl b/src/pairwise.jl index a06ca2b19..b6e8973a4 100644 --- a/src/pairwise.jl +++ b/src/pairwise.jl @@ -258,28 +258,28 @@ function pairwise_frank_wolfe( gamma = min(gamma_max, gamma) - # cleanup and renormalize every x iterations. Only for the fw steps. - renorm = mod(t, renorm_interval) == 0 # away update active_set_update!( active_set, -gamma, away_vertex, - true, + false, away_index, add_dropped_vertices=use_extra_vertex_storage, vertex_storage=extra_vertex_storage, ) - if add_dropped_vertices && gamma == gamma_max + if add_dropped_vertices && gamma ≈ gamma_max for vtx in active_set.atoms if vtx != v push!(extra_vertex_storage, vtx) end end end - # fw update - active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index) + # fw update + active_set_update!(active_set, gamma, fw_vertex, true, fw_index) end + # println(active_set.weights) + # println([atom[1] for atom in active_set.atoms]) if callback !== nothing state = CallbackState( @@ -328,6 +328,7 @@ function pairwise_frank_wolfe( v = compute_extreme_point(lmo, gradient) primal = f(x) dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient) + dual_gap = min(phi_value, dual_gap) step_type = ST_LAST tot_time = (time_ns() - time_start) / 1e9 if callback !== nothing diff --git a/src/types.jl b/src/types.jl index 0a00c2d16..e5bc9b266 100644 --- a/src/types.jl +++ b/src/types.jl @@ -256,55 +256,60 @@ Base.@propagate_inbounds function muladd_memory_mode(::InplaceEmphasis, x::Matri end """ - SymmetricArray{T, DT} + SubspaceVector{HasMultiplicities, T} +Companion structure of `SubspaceLMO` containing three fields: +- `data` is the full structure to be deflated, +- `vec` is a vector in the reduced subspace in which computations are performed, +- `mul` is only used to compute scalar products when `HasMultiplicities = true`, +which should be avoided (when possible) for performance reasons. """ -struct SymmetricArray{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T} +struct SubspaceVector{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T} data::DT # full array to be symmetrised, will generally fall out of sync wrt vec vec::V # vector representing the array mul::Vector{T} # only used for scalar products end -function SymmetricArray(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T} - return SymmetricArray{false,T,DT,V}(data, vec, T[]) +function SubspaceVector(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T} + return SubspaceVector{false,T,DT,V}(data, vec, T[]) end -function SymmetricArray(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T} - return SymmetricArray{true,T,DT,V}(data, vec, convert(Vector{T}, mul)) +function SubspaceVector(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T} + return SubspaceVector{true,T,DT,V}(data, vec, convert(Vector{T}, mul)) end -Base.@propagate_inbounds function Base.getindex(A::SymmetricArray, i) +Base.@propagate_inbounds function Base.getindex(A::SubspaceVector, i) @boundscheck checkbounds(A.vec, i) return @inbounds getindex(A.vec, i) end -Base.@propagate_inbounds function Base.setindex!(A::SymmetricArray, x, i) +Base.@propagate_inbounds function Base.setindex!(A::SubspaceVector, x, i) @boundscheck checkbounds(A.vec, i) return @inbounds setindex!(A.vec, x, i) end -Base.size(A::SymmetricArray) = size(A.vec) -Base.eltype(A::SymmetricArray) = eltype(A.vec) -Base.similar(A::SymmetricArray{true}) = SymmetricArray(similar(A.data), similar(A.vec), A.mul) -Base.similar(A::SymmetricArray{false}) = SymmetricArray(similar(A.data), similar(A.vec)) -Base.similar(A::SymmetricArray{true}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul)) -Base.similar(A::SymmetricArray{false}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T)) -Base.collect(A::SymmetricArray{true}) = SymmetricArray(collect(A.data), collect(A.vec), A.mul) -Base.collect(A::SymmetricArray{false}) = SymmetricArray(collect(A.data), collect(A.vec)) -Base.copyto!(dest::SymmetricArray, src::SymmetricArray) = copyto!(dest.vec, src.vec) -Base.:*(scalar::Real, A::SymmetricArray{true}) = SymmetricArray(A.data, scalar * A.vec, A.mul) -Base.:*(scalar::Real, A::SymmetricArray{false}) = SymmetricArray(A.data, scalar * A.vec) -Base.:*(A::SymmetricArray, scalar::Real) = scalar * A -Base.:/(A::SymmetricArray, scalar::Real) = inv(scalar) * A -Base.:+(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec, A1.mul) -Base.:+(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec) -Base.:-(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec, A1.mul) -Base.:-(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec) -Base.:-(A::SymmetricArray{true,T}) where {T} = SymmetricArray(A.data, -A.vec, A.mul) -Base.:-(A::SymmetricArray{false,T}) where {T} = SymmetricArray(A.data, -A.vec) - -LinearAlgebra.dot(A1::SymmetricArray{true}, A2::SymmetricArray{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec) -LinearAlgebra.dot(A1::SymmetricArray{false}, A2::SymmetricArray{false}) = dot(A1.vec, A2.vec) -LinearAlgebra.norm(A::SymmetricArray) = sqrt(dot(A, A)) - -Base.@propagate_inbounds Base.isequal(A1::SymmetricArray, A2::SymmetricArray) = isequal(A1.vec, A2.vec) +Base.size(A::SubspaceVector) = size(A.vec) +Base.eltype(A::SubspaceVector) = eltype(A.vec) +Base.similar(A::SubspaceVector{true}) = SubspaceVector(similar(A.data), similar(A.vec), A.mul) +Base.similar(A::SubspaceVector{false}) = SubspaceVector(similar(A.data), similar(A.vec)) +Base.similar(A::SubspaceVector{true}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul)) +Base.similar(A::SubspaceVector{false}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T)) +Base.collect(A::SubspaceVector{true}) = SubspaceVector(collect(A.data), collect(A.vec), A.mul) +Base.collect(A::SubspaceVector{false}) = SubspaceVector(collect(A.data), collect(A.vec)) +Base.copyto!(dest::SubspaceVector, src::SubspaceVector) = copyto!(dest.vec, src.vec) +Base.:*(scalar::Real, A::SubspaceVector{true}) = SubspaceVector(A.data, scalar * A.vec, A.mul) +Base.:*(scalar::Real, A::SubspaceVector{false}) = SubspaceVector(A.data, scalar * A.vec) +Base.:*(A::SubspaceVector, scalar::Real) = scalar * A +Base.:/(A::SubspaceVector, scalar::Real) = inv(scalar) * A +Base.:+(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec, A1.mul) +Base.:+(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec) +Base.:-(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec, A1.mul) +Base.:-(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec) +Base.:-(A::SubspaceVector{true,T}) where {T} = SubspaceVector(A.data, -A.vec, A.mul) +Base.:-(A::SubspaceVector{false,T}) where {T} = SubspaceVector(A.data, -A.vec) + +LinearAlgebra.dot(A1::SubspaceVector{true}, A2::SubspaceVector{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec) +LinearAlgebra.dot(A1::SubspaceVector{false}, A2::SubspaceVector{false}) = dot(A1.vec, A2.vec) +LinearAlgebra.norm(A::SubspaceVector) = sqrt(dot(A, A)) + +Base.@propagate_inbounds Base.isequal(A1::SubspaceVector, A2::SubspaceVector) = isequal(A1.vec, A2.vec) diff --git a/src/utils.jl b/src/utils.jl index 02f14eb71..f7f071a35 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1, return s end +fast_dot(a, Q, b) = dot(a, Q, b) + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + d = Q.diag + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx + nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]] + end +end + +function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real}) + if a === b + return _fast_quadratic_form_symmetric(a, Q) + end + n = length(a) + if length(b) != n + throw( + DimensionMismatch("Vector a has a length $n but b has a length $(length(b))") + ) + end + anzind = SparseArrays.nonzeroinds(a) + bnzind = SparseArrays.nonzeroinds(b) + anzval = SparseArrays.nonzeros(a) + bnzval = SparseArrays.nonzeros(b) + s = zero(Base.promote_eltype(a, Q, b)) + + if isempty(anzind) || isempty(bnzind) + return s + end + + a_idx = 1 + b_idx = 1 + a_idx_last = length(anzind) + b_idx_last = length(bnzind) + + # go through the nonzero indices of a and b simultaneously + @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last + ia = anzind[a_idx] + ib = bnzind[b_idx] + if ia == ib + s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx]) + a_idx += 1 + b_idx += 1 + elseif ia < ib + a_idx += 1 + else + b_idx += 1 + end + end + return s +end + + +function _fast_quadratic_form_symmetric(a, Q) + d = Q.diag + if length(d) != length(a) + throw(DimensionMismatch()) + end + nzvals = SparseArrays.nonzeros(a) + nzinds = SparseArrays.nonzeroinds(a) + s = zero(Base.promote_eltype(a, Q)) + @inbounds for nzidx in eachindex(nzvals) + s += nzvals[nzidx]^2 * d[nzinds[nzidx]] + end + return s +end + """ trajectory_callback(storage) diff --git a/test/active_set.jl b/test/active_set.jl index 101b17d07..29c98f62a 100644 --- a/test/active_set.jl +++ b/test/active_set.jl @@ -4,7 +4,7 @@ using FrankWolfe import FrankWolfe: ActiveSet using LinearAlgebra: norm -@testset "Active sets" begin +@testset "Active sets " begin @testset "Constructors and eltypes" begin active_set = ActiveSet([(0.1, [1, 2, 3]), (0.9, [2, 3, 4]), (0.0, [5, 6, 7])]) @@ -100,7 +100,7 @@ using LinearAlgebra: norm end end -@testset "Simplex gradient descent" begin +@testset "Simplex gradient descent " begin # Gradient descent over a 2-D unit simplex # each atom is a vertex, direction points to [1,1] # note: integers for atom element types @@ -175,7 +175,7 @@ end end end -@testset "LP separation oracle" begin +@testset "LP separation oracle " begin # Gradient descent over a L-inf ball of radius one # current active set contains 3 vertices # direction points to [1,1] @@ -212,7 +212,7 @@ end ) end -@testset "Argminmax" begin +@testset "Argminmax " begin active_set = FrankWolfe.ActiveSet([(0.6, [-1, -1]), (0.2, [0, 1]), (0.2, [1, 0])]) (λ_min, a_min, i_min, val, λ_max, a_max, i_max, valM, progress) = FrankWolfe.active_set_argminmax(active_set, [1, 1.5]) @@ -221,7 +221,7 @@ end @test_throws ErrorException FrankWolfe.active_set_argminmax(active_set, [NaN, NaN]) end -@testset "LPseparationWithScaledHotVector" begin +@testset "LPseparationWithScaledHotVector " begin v1 = FrankWolfe.ScaledHotVector(1, 1, 2) v2 = FrankWolfe.ScaledHotVector(1, 2, 2) v3 = FrankWolfe.ScaledHotVector(0, 2, 2) @@ -240,7 +240,7 @@ end ) end -@testset "ActiveSet for BigFloat" begin +@testset "ActiveSet for BigFloat " begin n = Int(1e2) lmo = FrankWolfe.LpNormLMO{BigFloat,1}(rand()) x0 = Vector(FrankWolfe.compute_extreme_point(lmo, zeros(n))) diff --git a/test/active_set_variants.jl b/test/active_set_variants.jl index 59b4a1cad..c191389f7 100644 --- a/test/active_set_variants.jl +++ b/test/active_set_variants.jl @@ -9,7 +9,7 @@ function test_callback(state, active_set, args...) @test grad0 ≈ state.gradient end -@testset "Testing active set Frank-Wolfe variants, BPFW, AFW, and PFW, including their lazy versions" begin +@testset "Testing active set based Frank-Wolfe variants " begin f(x) = norm(x)^2 function grad!(storage, x) @. storage = 2x @@ -134,7 +134,7 @@ end ) end -@testset "recompute or not last vertex" begin +@testset "Recompute or not last vertex " begin n = 10 f(x) = norm(x)^2 function grad!(storage, x) @@ -171,7 +171,7 @@ end @test lmo.counter == prev_counter - 1 end -@testset "Quadratic active set" begin +@testset "Quadratic active set " begin n = 2 # number of dimensions p = 10 # number of points function simple_reg_loss(θ, data_point) @@ -218,7 +218,7 @@ end end lmo = FrankWolfe.LpNormLMO{Float64, 2}(1.05 * norm(params_perfect)) x0 = FrankWolfe.compute_extreme_point(lmo, zeros(Float64, n+1)) - active_set = FrankWolfe.ActiveSetQuadratic([(1.0, x0)], gradf) + active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], gradf) res = FrankWolfe.blended_pairwise_conditional_gradient( f, gradf, diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl index 904de49d8..ddbd2bb8a 100644 --- a/test/alternating_methods_tests.jl +++ b/test/alternating_methods_tests.jl @@ -19,7 +19,7 @@ lmo1 = FrankWolfe.ScaledBoundLInfNormBall(-ones(n), zeros(n)) lmo2 = FrankWolfe.ScaledBoundLInfNormBall(zeros(n), ones(n)) lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) -@testset "Testing alternating linear minimization with block coordinate FW for different LMO-pairs " begin +@testset "Testing ALM with block-coordinate FW " begin x, _ = FrankWolfe.alternating_linear_minimization( FrankWolfe.block_coordinate_frank_wolfe, @@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1.0, + lambda=0.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/3, + lambda=1/6, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=1/9, + lambda=1/18, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, + lambda=1.5, line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), ) @@ -102,7 +102,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) (storage, x) -> storage .= zero(x), (lmo1, lmo3), ones(n), - verbose=true, + verbose=false, line_search=FrankWolfe.Shortstep(2), ) @@ -116,7 +116,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) (lmo1, lmo_prob), ones(n), trajectory=true, - verbose=true, + verbose=false, ) @test abs(x.blocks[1][1]) < 1e-6 @@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) ones(n), line_search=FrankWolfe.Agnostic(), momentum=0.9, + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3 @@ -141,10 +142,10 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n)) end -@testset "Testing different update orders for block coordinate FW in within alternating linear minimization" begin +@testset "Testing update orders for block-coordinate ALM-FW " begin orders = [ - FrankWolfe.FullUpdate(), + FrankWolfe.FullUpdate(), [FrankWolfe.CyclicUpdate(i) for i in [-1, 1, 2]]..., [FrankWolfe.StochasticUpdate(i) for i in [-1, 1, 2]]..., [FrankWolfe.DualGapOrder(i) for i in [-1, 1, 2]]..., @@ -160,13 +161,14 @@ end ones(n), line_search=FrankWolfe.Adaptive(relaxed_smoothness=true), update_order=order, + lambda=0.5, ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5 @test abs(x.blocks[2][1] - 1 / n) < 1e-5 end end -@testset "Testing alternating linear minimization with different FW methods" begin +@testset "Testing ALM with different FW methods " begin methods = [ FrankWolfe.frank_wolfe, @@ -181,6 +183,7 @@ end grad!, (lmo2, lmo_prob), ones(n), + lambda=0.5 ) @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6 @@ -188,7 +191,7 @@ end end end -@testset "Testing block-coordinate FW with different update steps and linesearch strategies" begin +@testset "Testing stepsize/linesearch in block-coordinate FW" begin x, _, _, _, _ = FrankWolfe.alternating_linear_minimization( FrankWolfe.block_coordinate_frank_wolfe, @@ -220,7 +223,7 @@ end grad!, (lmo1, lmo2), ones(n), - update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf), + update_step=FrankWolfe.BPCGStep(true), ) @test abs(x.blocks[1][1]) < 1e-6 @@ -244,8 +247,8 @@ end grad!, (lmo_nb, lmo_prob), ones(n), - lambda=3.0, - line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ + lambda=1.5, + line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()), ) @@ -253,35 +256,35 @@ end @test abs(x.blocks[2][1] - 1 / n) < 1e-6 end -@testset "Testing alternating projections for different LMO-pairs " begin +@testset "Testing alternating projections " begin - x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true) + x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=false) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n)) - @test abs(x[1][1] - 1) < 1e-6 - @test abs(x[2][1] - 1 / n) < 1e-6 + @test abs(x.blocks[1][1] - 1) < 1e-6 + @test abs(x.blocks[2][1] - 1 / n) < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n)) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1]) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1]) < 1e-6 @test infeas < 1e-6 x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n)) - @test abs(x[1][1] - 1) < 1e-4 - @test abs(x[2][1] - 1) < 1e-4 + @test abs(x.blocks[1][1] - 1) < 1e-4 + @test abs(x.blocks[2][1] - 1) < 1e-4 @test infeas < 1e-6 x, _, _, infeas, traj_data = FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true) - @test abs(x[1][1]) < 1e-6 - @test abs(x[2][1] - 1) < 1e-6 + @test abs(x.blocks[1][1]) < 1e-6 + @test abs(x.blocks[2][1] - 1) < 1e-6 @test traj_data != [] @test length(traj_data[1]) == 5 @test length(traj_data) >= 2 diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl new file mode 100644 index 000000000..66f76e488 --- /dev/null +++ b/test/as_quadratic_projection.jl @@ -0,0 +1,116 @@ +using FrankWolfe +using LinearAlgebra +using Random + +import HiGHS +import MathOptInterface as MOI +using Test + +n = Int(1e2) +k = 10000 + +# s = rand(1:100) +s = 10 +# @info "Seed $s" +Random.seed!(s) + + +xpi = rand(n); +total = sum(xpi); + +xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +# lmo = FrankWolfe.KSparseLMO(5, 1000.0) + +## other LMOs to try +# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0") +# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0) +# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0); +lmo = FrankWolfe.UnitSimplexOracle(10000.0); + +x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + callback=build_callback(trajectoryBPCG_standard), +); + + +active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_automatic_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic_standard, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_automatic_standard), +); + +active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + grad!, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_automatic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_automatic, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_automatic), +); + +active_set_quadratic_manual = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([(1.0, copy(x00))]), + 2.0 * I, -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), + scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1), +) +trajectoryBPCG_quadratic_manual = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + active_set_quadratic_manual, + max_iteration=k, + callback=build_callback(trajectoryBPCG_quadratic_manual), +); + +traj_data = [ + trajectoryBPCG_standard, + trajectoryBPCG_quadratic_automatic_standard, + trajectoryBPCG_quadratic_automatic, + trajectoryBPCG_quadratic_manual, +] + +# all should have converged +for traj in traj_data + @test traj[end][2] ≤ 1e-8 + @test traj[end][4] ≤ 1e-7 +end diff --git a/test/bcg_direction_error.jl b/test/bcg_direction_error.jl index 6c1ab0dba..b0fa28b89 100644 --- a/test/bcg_direction_error.jl +++ b/test/bcg_direction_error.jl @@ -33,7 +33,7 @@ x, v, primal, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient( line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0), print_iter=100, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=false, lazy_tolerance=1.0, weight_purge_threshold=1e-9, @@ -55,7 +55,7 @@ x, v, primal_cut, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient( line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=false, lazy_tolerance=1.0, weight_purge_threshold=1e-10, diff --git a/test/benchmark.jl b/test/benchmark.jl index 38cc72253..0b6f13063 100644 --- a/test/benchmark.jl +++ b/test/benchmark.jl @@ -1,7 +1,6 @@ import FrankWolfe import LinearAlgebra - n = Int(1e6); xpi = rand(n); diff --git a/test/blocks.jl b/test/blocks.jl index 485716bde..ab2909ecb 100644 --- a/test/blocks.jl +++ b/test/blocks.jl @@ -4,7 +4,7 @@ using LinearAlgebra using FrankWolfe using SparseArrays -@testset "Block array behaviour" begin +@testset "Block array behavior " begin arr = FrankWolfe.BlockVector([ [ 1 3 @@ -53,7 +53,7 @@ using SparseArrays end -@testset "FrankWolfe array methods" begin +@testset "Frank-Wolfe array methods " begin @testset "Block arrays" begin mem = FrankWolfe.InplaceEmphasis() arr0 = FrankWolfe.BlockVector([ diff --git a/test/extra_storage.jl b/test/extra_storage.jl index 26b90055a..1465bceb6 100644 --- a/test/extra_storage.jl +++ b/test/extra_storage.jl @@ -11,7 +11,7 @@ function grad!(storage, x) return storage .= x .- center0 end -@testset "Blended Pairwise Conditional Gradient" begin +@testset "Blended Pairwise Conditional Gradient " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) @@ -59,7 +59,7 @@ end end end -@testset "Away-Frank-Wolfe" begin +@testset "Away-Frank-Wolfe " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) @@ -107,7 +107,7 @@ end end end -@testset "Blended Pairwise Conditional Gradient" begin +@testset "Blended Pairwise Conditional Gradient " begin lmo = FrankWolfe.UnitSimplexOracle(4.3) tlmo = FrankWolfe.TrackingLMO(lmo) diff --git a/test/function_gradient.jl b/test/function_gradient.jl index 4bfd1a734..01b10a7b0 100644 --- a/test/function_gradient.jl +++ b/test/function_gradient.jl @@ -7,7 +7,7 @@ import FrankWolfe: compute_gradient, compute_value, compute_value_gradient Random.seed!(123) -@testset "Simple and stochastic function gradient interface" begin +@testset "Simple and stochastic function gradient interface " begin for n in (1, 5, 20) A = rand(Float16, n, n) A .+= A' diff --git a/test/generic-arrays.jl b/test/generic-arrays.jl index eab5107a9..53e0a8a17 100644 --- a/test/generic-arrays.jl +++ b/test/generic-arrays.jl @@ -8,7 +8,7 @@ function Base.convert(::Type{GenericArray{Float64,1}}, v::FrankWolfe.ScaledHotVe return GenericArray(collect(v)) end -@testset "GenericArray is maintained" begin +@testset "GenericArray is maintained " begin n = Int(1e3) k = n @@ -29,7 +29,7 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -44,7 +44,7 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), VType=FrankWolfe.ScaledHotVector{Float64}, ) @@ -60,12 +60,12 @@ end max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) end -@testset "fast_equal special types" begin +@testset "Testing fast_equal for special types " begin @testset "ScaledHotVector" begin a = FrankWolfe.ScaledHotVector(3.5, 3, 10) b = FrankWolfe.ScaledHotVector(3.5, 3, 11) diff --git a/test/lmo.jl b/test/lmo.jl index 030b76fe0..b30f01cd8 100644 --- a/test/lmo.jl +++ b/test/lmo.jl @@ -14,7 +14,7 @@ import Clp import Hypatia using JuMP -@testset "Simplex LMOs" begin +@testset "Simplex LMOs " begin n = 6 direction = zeros(6) rhs = 10 * rand() @@ -60,7 +60,7 @@ using JuMP end end -@testset "Hypersimplex" begin +@testset "Hypersimplex " begin @testset "Hypersimplex $n $K" for n in (2, 5, 10), K in (1, min(n, 4)) K = 1 n = 5 @@ -191,7 +191,7 @@ end end end -@testset "Lp-norm epigraph LMO" begin +@testset "Lp-norm epigraph LMO " begin for n in (1, 2, 5, 10) τ = 5 + 3 * rand() # tests that the "special" p behaves like the "any" p, i.e. 2.0 and 2 @@ -234,7 +234,7 @@ end end end -@testset "K-sparse polytope LMO" begin +@testset "K-sparse polytope LMO " begin @testset "$n-dimension" for n in (1, 2, 10) τ = 5 + 3 * rand() for K in 1:n @@ -264,7 +264,7 @@ end @test norm(v) > 0 end -@testset "Caching on simplex LMOs" begin +@testset "Caching on simplex LMOs " begin n = 6 direction = zeros(6) rhs = 10 * rand() @@ -314,7 +314,7 @@ function _is_doubly_stochastic(m) end end -@testset "Birkhoff polytope" begin +@testset "Birkhoff polytope " begin Random.seed!(42) lmo = FrankWolfe.BirkhoffPolytopeLMO() for n in (1, 2, 10) @@ -358,7 +358,7 @@ end ] end -@testset "Matrix completion and nuclear norm" begin +@testset "Matrix completion and nuclear norm " begin nfeat = 50 nobs = 100 r = 5 @@ -425,7 +425,7 @@ end ) end -@testset "Spectral norms" begin +@testset "Spectral norms " begin Random.seed!(42) o = Hypatia.Optimizer() MOI.set(o, MOI.Silent(), true) @@ -514,7 +514,7 @@ end end end -@testset "MOI oracle consistency" begin +@testset "MOI oracle consistency " begin Random.seed!(42) o = GLPK.Optimizer() MOI.set(o, MOI.Silent(), true) @@ -649,7 +649,7 @@ end end end -@testset "MOI oracle on Birkhoff polytope" begin +@testset "MOI oracle on Birkhoff polytope " begin o = GLPK.Optimizer() o_ref = GLPK.Optimizer() for n in (1, 2, 10) @@ -687,7 +687,7 @@ end end end -@testset "MOI oracle and KSparseLMO" begin +@testset "MOI oracle and KSparseLMO " begin o_base = GLPK.Optimizer() cached = MOI.Utilities.CachingOptimizer( MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()), @@ -751,7 +751,7 @@ end end end -@testset "Product LMO" begin +@testset "Product LMO " begin lmo = FrankWolfe.ProductLMO(FrankWolfe.LpNormLMO{Inf}(3.0), FrankWolfe.LpNormLMO{1}(2.0)) dinf = randn(10) d1 = randn(5) @@ -770,7 +770,7 @@ end @test FrankWolfe.BlockVector([vinf, v1]) == v_block end -@testset "Scaled L-1 norm polytopes" begin +@testset "Scaled L-1 norm polytopes " begin lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10)) # equivalent to LMO lmo_ref = FrankWolfe.LpNormLMO{1}(1) @@ -802,7 +802,7 @@ end @test v ≈ [-1, 0] end -@testset "Scaled L-inf norm polytopes" begin +@testset "Scaled L-inf norm polytopes " begin # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10)) lmo_ref = FrankWolfe.LpNormLMO{Inf}(1) @@ -847,7 +847,7 @@ end end end -@testset "Copy MathOpt LMO" begin +@testset "Copy MathOpt LMO " begin o_clp = MOI.Utilities.CachingOptimizer( MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()), Clp.Optimizer(), @@ -869,7 +869,7 @@ end end end -@testset "MathOpt LMO with BlockVector" begin +@testset "MathOpt LMO with BlockVector " begin o = GLPK.Optimizer() MOI.set(o, MOI.Silent(), true) x = MOI.add_variables(o, 10) @@ -888,7 +888,7 @@ end end -@testset "Inplace LMO correctness" begin +@testset "Inplace LMO correctness " begin V = [-6.0, -6.15703, -5.55986] M = [3.0 2.8464949 2.4178848; 2.8464949 3.0 2.84649498; 2.4178848 2.84649498 3.0] @@ -912,7 +912,7 @@ end @test x_dense == x_standard end -@testset "Ellipsoid LMO $n" for n in (2, 5, 10) +@testset "Ellipsoid LMO $n " for n in (2, 5, 9) A = zeros(n, n) A[1, 1] = 3 @test_throws PosDefException FrankWolfe.EllipsoidLMO(A) @@ -944,7 +944,7 @@ end @test dot(xv, d) ≈ dot(v, d) atol = 1e-5 * n end -@testset "Convex hull" begin +@testset "Convex hull " begin lmo = FrankWolfe.ConvexHullOracle([[1, 0, 0], [0, 1, 0], [0, 0, 1]]) for _ in 1:100 d = randn(3) @@ -957,7 +957,7 @@ end @test v == lmo.vertices[1] end -@testset "Symmetric LMO" begin +@testset "Symmetric LMO " begin # See examples/reynolds.jl struct BellCorrelationsLMO{T} <: FrankWolfe.LinearMinimizationOracle m::Int # number of inputs @@ -1023,7 +1023,7 @@ end end reynolds_adjoint(gradient, lmo) = gradient lmo = BellCorrelationsLMO{Float64}(size(p, 1), zeros(size(p, 1))) - sym = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint) + sym = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint) x0 = FrankWolfe.compute_extreme_point(sym, -p) active_set = FrankWolfe.ActiveSet([(1.0, x0)]) res = FrankWolfe.blended_pairwise_conditional_gradient( @@ -1038,7 +1038,7 @@ end @test length(res[6]) < 25 end -@testset "Ordered Weighted Norm LMO" begin +@testset "Ordered Weighted Norm LMO " begin Random.seed!(4321) N = Int(1e3) for _ in 1:10 diff --git a/test/momentum_memory.jl b/test/momentum_memory.jl index 45fa98001..5ec6aea50 100644 --- a/test/momentum_memory.jl +++ b/test/momentum_memory.jl @@ -8,7 +8,7 @@ n = Int(1e3) k = 10000 s = rand(1:100) -@info "Seed: $s" +# @info "Seed: $s" Random.seed!(s) @@ -47,7 +47,7 @@ xmem, _ = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, trajectory=true, momentum=0.9, ) diff --git a/test/oddities.jl b/test/oddities.jl index 466a5019f..b64ecb901 100644 --- a/test/oddities.jl +++ b/test/oddities.jl @@ -2,7 +2,7 @@ import FrankWolfe using LinearAlgebra using Test -@testset "Testing adaptive LS when already optimal and gradient is 0" begin +@testset "Testing adaptive LS at optimality " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -19,7 +19,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -32,7 +32,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @test abs( @@ -43,7 +43,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -58,7 +58,7 @@ using Test max_iteration=1000, lazy=true, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @test abs( @@ -70,7 +70,7 @@ using Test max_iteration=1000, lazy=true, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 @@ -83,7 +83,7 @@ using Test x0, max_iteration=1000, line_search=FrankWolfe.Adaptive(), - verbose=true, + verbose=false, )[3], ) < 1.0e-10 diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl new file mode 100644 index 000000000..50bcdb6e6 --- /dev/null +++ b/test/quadratic_lp_active_set.jl @@ -0,0 +1,113 @@ +using FrankWolfe +using LinearAlgebra +using Random +using Test +import HiGHS +import MathOptInterface as MOI + +n = Int(1e4) +k = 5000 + +s = 10 +# @info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.KSparseLMO(5, 1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_standard), +); + +as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_specialized = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_direct_specialized), +); + +as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve( + [(1.0, copy(x00))], + 2 * Diagonal(ones(length(xp))), + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_direct_generic = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_generic, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_direct_generic), +); + +as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve( + FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)), + 2 * LinearAlgebra.I, + -2xp, + MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)), +) + +trajectoryBPCG_quadratic_noqas = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + as_quad_direct_basic_as, + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_quadratic_noqas), +); + + +dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4) +dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4) +dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4) +dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4) + + +@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end] +@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end] +@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5 diff --git a/test/rational_test.jl b/test/rational_test.jl index e9725157a..415b33291 100644 --- a/test/rational_test.jl +++ b/test/rational_test.jl @@ -37,7 +37,7 @@ xmem, vmem, primal, dual_gap, trajectory = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Agnostic(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -49,7 +49,7 @@ xstep, _ = FrankWolfe.frank_wolfe( max_iteration=k, line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 10, - verbose=true, + verbose=false, ) @test eltype(xmem) == eltype(xstep) == Rational{BigInt} diff --git a/test/runtests.jl b/test/runtests.jl index d56d47a75..c99097de4 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -12,7 +12,7 @@ include("utils.jl") include("active_set_variants.jl") include("alternating_methods_tests.jl") -@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing vanilla Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -27,7 +27,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=10000, line_search=FrankWolfe.GeneralizedAgnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 g(x) = 2 + log(1 + log(x + 1)) @@ -39,7 +39,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=10000, line_search=FrankWolfe.GeneralizedAgnostic(g), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -50,7 +50,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -73,7 +73,7 @@ include("alternating_methods_tests.jl") x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -157,12 +157,12 @@ include("alternating_methods_tests.jl") ) < 1.0e-3 end -@testset "Gradient with momentum correctly updated" begin +@testset "Gradient with momentum correctly updated " begin # fixing https://github.com/ZIB-IOL/FrankWolfe.jl/issues/47 include("momentum_memory.jl") end -@testset "Testing Lazified Conditional Gradients with various step size strategies" begin +@testset "Testing Lazified Conditional Gradients " begin f(x) = norm(x)^2 function grad!(storage, x) @. storage = 2x @@ -178,7 +178,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -189,7 +189,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Backtracking(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -200,7 +200,7 @@ end x0, max_iteration=1000, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -211,12 +211,12 @@ end x0, max_iteration=1000, line_search=FrankWolfe.AdaptiveZerothOrder(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 end -@testset "Testing Lazified Conditional Gradients with cache strategies" begin +@testset "Testing caching in Lazified Conditional Gradients " begin n = Int(1e5) L = 2 k = 1000 @@ -237,7 +237,7 @@ end x0, max_iteration=k, line_search=FrankWolfe.Shortstep(2.0), - verbose=true, + verbose=false, ) @test primal - 1 / n <= bound @@ -270,7 +270,7 @@ end @test primal - 1 / n <= bound end -@testset "Testing memory_mode blas vs memory" begin +@testset "Testing memory_mode blas vs memory " begin n = Int(1e5) k = 100 xpi = rand(n) @@ -337,7 +337,7 @@ end @test primal < f(x0) end end -@testset "Testing rational variant" begin +@testset "Testing rational variant " begin rhs = 1 n = 40 k = 1000 @@ -379,7 +379,7 @@ end line_search=FrankWolfe.Agnostic(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == eltype(x) == Rational{BigInt} @test f(x) <= 1e-4 @@ -395,7 +395,7 @@ end line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 100, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) x0 = FrankWolfe.compute_extreme_point(lmo, direction) @@ -408,11 +408,11 @@ end line_search=FrankWolfe.Shortstep(2 // 1), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x) == Rational{BigInt} end -@testset "Multi-precision tests" begin +@testset "Multi-precision tests " begin rhs = 1 n = 100 k = 1000 @@ -472,7 +472,7 @@ end line_search=FrankWolfe.AdaptiveZerothOrder(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == T @@ -487,7 +487,7 @@ end line_search=FrankWolfe.AdaptiveZerothOrder(), print_iter=k / 10, memory_mode=FrankWolfe.InplaceEmphasis(), - verbose=true, + verbose=false, ) @test eltype(x0) == T @@ -496,7 +496,7 @@ end end end -@testset "Stochastic FW linear regression" begin +@testset "Stochastic FW linear regression " begin function simple_reg_loss(θ, data_point) (xi, yi) = data_point (a, b) = (θ[1:end-1], θ[end]) @@ -583,7 +583,7 @@ end ) end -@testset "Away-step FW" begin +@testset "Away-step FW " begin n = 50 lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0) x0 = FrankWolfe.compute_extreme_point(lmo_prob, rand(n)) @@ -614,7 +614,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -629,7 +629,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -645,7 +645,7 @@ end away_steps=false, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -660,7 +660,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -675,7 +675,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -691,7 +691,7 @@ end away_steps=false, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -706,7 +706,7 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.InplaceEmphasis(), ) @@ -722,12 +722,12 @@ end max_iteration=k, line_search=FrankWolfe.Backtracking(), print_iter=k / 10, - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) end -@testset "Blended conditional gradient" begin +@testset "Testing Blended Conditional Gradient " begin n = 50 lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0) x0 = FrankWolfe.compute_extreme_point(lmo_prob, randn(n)) @@ -745,7 +745,7 @@ end x0, max_iteration=k, line_search=FrankWolfe.Backtracking(), - verbose=true, + verbose=false, memory_mode=FrankWolfe.OutplaceEmphasis(), ) @@ -767,44 +767,64 @@ end end - include("oddities.jl") include("tracking.jl") # in separate module for name space issues module BCGDirectionError using Test -@testset "BCG direction accuracy" begin +@testset "BCG direction accuracy " begin include("bcg_direction_error.jl") end end module RationalTest using Test -@testset "Rational test and shortstep" begin +@testset "Rational test and shortstep " begin include("rational_test.jl") end end module BCGAccel using Test -@testset "BCG acceleration with different types" begin +@testset "BCG acceleration with different types " begin include("blended_accelerated.jl") end end module VertexStorageTest using Test -@testset "Vertex storage" begin +@testset "Vertex storage " begin include("extra_storage.jl") end end +module LpDirectSolveTest +using Test +@testset "LP solving for quadratic functions and active set " begin + include("quadratic_lp_active_set.jl") +end +end + +module LpDirectSolveTestProjection +using Test +@testset "LP solving for quadratic functions and active set " begin + include("as_quadratic_projection.jl") +end +end + +module SparsifyingActiveSetTest +using Test +@testset "Sparsifying active set " begin + include("sparsifying_activeset.jl") +end +end + include("generic-arrays.jl") include("blocks.jl") -@testset "End-to-end trajectory tests" begin +@testset "End-to-end trajectory tests " begin trajectory_testfiles = readdir(joinpath(@__DIR__, "trajectory_tests"), join=true) for file in trajectory_testfiles @eval Module() begin diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl new file mode 100644 index 000000000..1cf8d8299 --- /dev/null +++ b/test/sparsifying_activeset.jl @@ -0,0 +1,85 @@ +#= +This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm +with direct solve steps for a quadratic optimization problem over a sparse polytope. + +The example showcases how the algorithm balances between: +- Pairwise steps for efficient optimization +- Periodic direct solves for handling the quadratic objective +- Lazy (approximate) linear minimization steps for improved iteration complexity + +It also demonstrates how to set up custom callbacks for tracking algorithm progress. +=# + +using LinearAlgebra +using Test +using Random +using FrankWolfe +import HiGHS + +n = Int(1e4) +k = 10000 + +# s = rand(1:100) +s = 10 +# @info "Seed $s" +Random.seed!(s) + +xpi = rand(n); +total = sum(xpi); + +# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use: +# const xp = xpi; + +const xp = xpi ./ total; + +f(x) = norm(x - xp)^2 +function grad!(storage, x) + @. storage = 2 * (x - xp) +end + +lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0) + +const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n)) + +function build_callback(trajectory_arr) + return function callback(state, active_set, args...) + return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set))) + end +end + +trajectoryBPCG_standard = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + trajectory=false, + callback=build_callback(trajectoryBPCG_standard), +); + +active_set_sparse = FrankWolfe.ActiveSetSparsifier( + FrankWolfe.ActiveSet([1.0], [x00], similar(x00)), + HiGHS.Optimizer(), +) +trajectoryBPCG_as_sparse = [] +x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient( + f, + grad!, + lmo, + copy(x00), + max_iteration=k, + line_search=FrankWolfe.Shortstep(2.0), + verbose=false, + callback=build_callback(trajectoryBPCG_as_sparse), +); + +as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6) +as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6) +@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0 + +dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4) +dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4) +@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7 diff --git a/test/timeout.jl b/test/timeout.jl index 7094ef5b3..9ba8aa47e 100644 --- a/test/timeout.jl +++ b/test/timeout.jl @@ -2,7 +2,7 @@ using FrankWolfe using LinearAlgebra using SparseArrays -@testset "Timing out" begin +@testset "Timing out " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -17,7 +17,7 @@ using SparseArrays x0, max_iteration=1000, line_search=FrankWolfe.Agnostic(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( @@ -40,7 +40,7 @@ using SparseArrays x0, max_iteration=1000, line_search=FrankWolfe.Goldenratio(), - verbose=true, + verbose=false, )[3] - 0.2, ) < 1.0e-5 @test abs( diff --git a/test/tracking.jl b/test/tracking.jl index ecd7d1823..dac8cb5f7 100644 --- a/test/tracking.jl +++ b/test/tracking.jl @@ -3,7 +3,7 @@ using LinearAlgebra using SparseArrays using FrankWolfe -@testset "Tracking Testset" begin +@testset "Tracking Testset " begin f(x) = norm(x)^2 function grad!(storage, x) return storage .= 2x @@ -41,7 +41,7 @@ using FrankWolfe end end -@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing vanilla Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) @@ -68,7 +68,7 @@ end trajectory=true, callback=nothing, traj_data=storage, - verbose=true, + verbose=false, ) @test length(storage[1]) == 5 @@ -79,7 +79,7 @@ end @test tlmo.counter == niters + 1 # x0 computation and initialization end -@testset "Testing lazified Frank-Wolfe with various step size and momentum strategies" begin +@testset "Testing lazified Frank-Wolfe " begin f(x) = norm(x)^2 function grad!(storage, x) diff --git a/test/trajectory_tests/open_loop_parametric.jl b/test/trajectory_tests/open_loop_parametric.jl index 474d98ecb..38b0e5fce 100644 --- a/test/trajectory_tests/open_loop_parametric.jl +++ b/test/trajectory_tests/open_loop_parametric.jl @@ -26,7 +26,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(2), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -39,7 +39,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(10), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -68,7 +68,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(2), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) @@ -81,7 +81,7 @@ using LinearAlgebra line_search=FrankWolfe.Agnostic(10), print_iter=k / 10, epsilon=1e-5, - verbose=true, + verbose=false, trajectory=true, ) diff --git a/test/utils.jl b/test/utils.jl index b535a3424..4d7cc4539 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -3,7 +3,7 @@ using LinearAlgebra using Test using SparseArrays -@testset "Simple benchmark_oracles function" begin +@testset "Simple benchmark_oracles function " begin n = Int(1e3) xpi = rand(n) @@ -21,7 +21,7 @@ using SparseArrays FrankWolfe.benchmark_oracles(f, grad!, () -> rand(n), lmo_prob; k=100) end -@testset "RankOneMatrix" begin +@testset "RankOneMatrix " begin for n in (1, 2, 5) for _ in 1:5 v = rand(n) @@ -64,7 +64,7 @@ end end end -@testset "RankOne muladd_memory_mode $n" for n in (1, 2, 5) +@testset "RankOne muladd_memory_mode $n " for n in (1, 2, 5) for _ in 1:5 n = 5 v = rand(n) @@ -82,7 +82,7 @@ end end end -@testset "Line Search methods" begin +@testset "Line Search methods " begin a = [-1.0, -1.0, -1.0] b = [1.0, 1.0, 1.0] function grad!(storage, x) @@ -214,14 +214,14 @@ end ) end -@testset "Momentum tests" begin +@testset "Momentum tests " begin it = FrankWolfe.ExpMomentumIterator() it.num = 0 # no momentum -> 1 @test FrankWolfe.momentum_iterate(it) == 1 end -@testset "Fast dot complex & norm" begin +@testset "Fast dot complex & norm " begin s = sparse(I, 3, 3) m = randn(Complex{Float64}, 3, 3) @test dot(s, m) ≈ FrankWolfe.fast_dot(s, m)