diff --git a/Project.toml b/Project.toml
index 51bcf8b94..eb2561631 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "FrankWolfe"
 uuid = "f55ce6ea-fdc5-4628-88c5-0087fe54bd30"
 authors = ["ZIB-IOL"]
-version = "0.4.0"
+version = "0.4.2"
 
 [deps]
 Arpack = "7d9fca2a-8960-54d3-9f78-7d1dccf2cb97"
@@ -44,6 +44,7 @@ DynamicPolynomials = "7c1d4256-1411-5781-91ec-d7bc3513ac07"
 FiniteDifferences = "26cc04aa-876d-5657-8c51-4c34ba976000"
 ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210"
 GLPK = "60bf3e95-4087-53dc-ae20-288a0d20c6a6"
+HiGHS = "87dc4568-4c63-4d18-b0c0-bb2238e4078b"
 Hypatia = "b99e6be6-89ff-11e8-14f8-45c827f4f8f2"
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"
 JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
@@ -59,4 +60,4 @@ Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
 ZipFile = "a5390f91-8eb1-5f08-bee0-b1d1ffed6cea"
 
 [targets]
-test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
+test = ["CSV", "Combinatorics", "DataFrames", "Distributions", "DoubleFloats", "DynamicPolynomials", "FiniteDifferences", "ForwardDiff", "GLPK", "HiGHS", "JSON", "JuMP", "LaTeXStrings", "MAT", "MultivariatePolynomials", "Plots", "PlotThemes", "Polyhedra", "ReverseDiff", "ZipFile", "Test", "Tullio", "Clp", "Hypatia"]
diff --git a/docs/src/advanced.md b/docs/src/advanced.md
index a404fad85..ea438ca74 100644
--- a/docs/src/advanced.md
+++ b/docs/src/advanced.md
@@ -53,7 +53,7 @@ See [Extra-lazification](@ref) for a complete example.
 
 ## Specialized active set for quadratic functions
 
-If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadratic`.
+If the objective function is quadratic, a considerable speedup can be obtained by using the structure `ActiveSetQuadraticProductCaching`.
 It relies on the storage of various scalar products to efficiently determine the best (and worst for `blended_pairwise_conditional_gradient`) atom in the active set without the need of computing many scalar products in each iteration.
 The user should provide the Hessian matrix `A` as well as the linear part `b` of the function, such that:
 ```math
@@ -228,6 +228,6 @@ This subspace is the image of the Reynolds operator defined by
 \mathcal{R}(x)=\frac{1}{|G|}\sum_{g\in G}g\cdot x.
 ```
 
-In practice, the type `SymmetricLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
+In practice, the type `SubspaceLMO` allows the user to provide the Reynolds operator $\mathcal{R}$ as well as its adjoint $\mathcal{R}^\ast$.
 The gradient is symmetrised with $\mathcal{R}^\ast$, then passed to the non-symmetric LMO, and the resulting output is symmetrised with $\mathcal{R}$.
 In many cases, the gradient is already symmetric so that `reynolds_adjoint(gradient, lmo) = gradient` is a fast and valid choice.
diff --git a/docs/src/reference/3_backend.md b/docs/src/reference/3_backend.md
index f0ef0b2ae..34e71f040 100644
--- a/docs/src/reference/3_backend.md
+++ b/docs/src/reference/3_backend.md
@@ -4,7 +4,7 @@
 
 ```@autodocs
 Modules = [FrankWolfe]
-Pages = ["active_set.jl"]
+Pages = ["active_set.jl", "active_set_quadratic.jl", "active_set_quadratic_direct_solve.jl", "active_set_sparsifier.jl"]
 ```
 
 ## Functions and gradients
@@ -67,6 +67,7 @@ FrankWolfe.select_update_indices
 FrankWolfe.FullUpdate
 FrankWolfe.CyclicUpdate
 FrankWolfe.StochasticUpdate
+FrankWolfe.LazyUpdate
 ```
 
 ## Update step for block-coordinate Frank-Wolfe
diff --git a/examples/alternating_projections.jl b/examples/alternating_projections.jl
new file mode 100644
index 000000000..159d204ff
--- /dev/null
+++ b/examples/alternating_projections.jl
@@ -0,0 +1,37 @@
+import FrankWolfe
+using LinearAlgebra
+using Random
+
+
+include("../examples/plot_utils.jl")
+
+Random.seed!(100)
+
+n = 500
+lmo = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .+ 1.0 for _ in 1:n])
+lmo2 = FrankWolfe.ConvexHullOracle([rand(Float64, (n,)) .- 1.0 for _ in 1:n])
+
+
+trajectories = []
+
+methods = [FrankWolfe.frank_wolfe, FrankWolfe.blended_pairwise_conditional_gradient, FrankWolfe.blended_pairwise_conditional_gradient]
+
+for (i,m) in enumerate(methods)
+    if i == 1
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m)
+    elseif i== 2
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=false, lazy=true)
+    else
+        x, _, _, _, traj_data = FrankWolfe.alternating_projections((lmo, lmo2), ones(n); verbose=true, print_iter=100, trajectory=true, proj_method=m, reuse_active_set=true, lazy=true)
+    end
+    push!(trajectories, traj_data)
+end
+
+
+
+labels = ["FW", "BPCG" ,"BPCG (reuse)"]
+
+plot_trajectories(trajectories, labels, xscalelog=true)
+
+
+
diff --git a/examples/birkhoff_polytope.jl b/examples/birkhoff_polytope.jl
index e181ba69b..c81771ca7 100644
--- a/examples/birkhoff_polytope.jl
+++ b/examples/birkhoff_polytope.jl
@@ -54,7 +54,7 @@ FrankWolfe.benchmark_oracles(
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo,
-    FrankWolfe.ActiveSetQuadratic([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, collect(x0))], 2I/n^2, -2xp/n^2); # surprisingly faster and more memory efficient with collect
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
diff --git a/examples/birkhoff_polytope_symmetric.jl b/examples/birkhoff_polytope_symmetric.jl
index db1c79a55..8e1c3933d 100644
--- a/examples/birkhoff_polytope_symmetric.jl
+++ b/examples/birkhoff_polytope_symmetric.jl
@@ -36,7 +36,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
     x -> cf(x, xp, normxp2),
     (str, x) -> cgrad!(str, x, xp),
     lmo_nat,
-    FrankWolfe.ActiveSetQuadratic([(1.0, x0)], 2I/n^2, -2xp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], 2I/n^2, -2xp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
@@ -48,15 +48,15 @@ x0 = FrankWolfe.compute_extreme_point(lmo_nat, randn(n, n))
 # here the problem is invariant under mirror symmetry around the diagonal and the anti-diagonal
 # each solution of the LMO can then be added to the active set together with its orbit
 # on top of that, the effective dimension of the space is reduced
-# the following function constructs the functions `reduce` and `inflate` needed for SymmetricLMO
-# `reduce` maps a matrix to the invariant vector space
+# the following function constructs the functions `deflate` and `inflate` needed for SubspaceLMO
+# `deflate` maps a matrix to the invariant vector space
 # `inflate` maps a vector in this space back to a matrix
-# using `FrankWolfe.SymmetricArray` is a convenience to avoid reallocating the result of `inflate`
-function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
+# using `FrankWolfe.SubspaceVector` is a convenience to avoid reallocating the result of `inflate`
+function build_deflate_inflate(p::Matrix{T}) where {T <: Number}
     n = size(p, 1)
     @assert n == size(p, 2) # square matrix
-    dimension = floor(Int, (n+1)^2 / 4) # reduced dimension
-    function reduce(A::AbstractMatrix{T}, lmo)
+    dimension = floor(Int, (n+1)^2 / 4) # deflated dimension
+    function deflate(A::AbstractMatrix{T}, lmo)
         vec = Vector{T}(undef, dimension)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
@@ -75,9 +75,9 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
                 end
             end
         end
-        return FrankWolfe.SymmetricArray(A, vec)
+        return FrankWolfe.SubspaceVector(A, vec)
     end
-    function inflate(x::FrankWolfe.SymmetricArray, lmo)
+    function inflate(x::FrankWolfe.SubspaceVector, lmo)
         cnt = 0
         @inbounds for i in 1:(n+1)÷2, j in i:n+1-i
             cnt += 1
@@ -102,22 +102,22 @@ function build_reduce_inflate(p::Matrix{T}) where {T <: Number}
         end
         return x.data
     end
-    return reduce, inflate
+    return deflate, inflate
 end
 
-reduce, inflate = build_reduce_inflate(xpi)
-const rxp = reduce(xpi, nothing)
-@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in reduce and inflate
+deflate, inflate = build_deflate_inflate(xpi)
+const rxp = deflate(xpi, nothing)
+@assert dot(rxp, rxp) ≈ normxp2 # should be correct thanks to the factors sqrt(2) and 2 in deflate and inflate
 
-lmo_sym = FrankWolfe.SymmetricLMO(lmo_nat, reduce, inflate)
+lmo_sym = FrankWolfe.SubspaceLMO(lmo_nat, deflate, inflate)
 
-rx0 = FrankWolfe.compute_extreme_point(lmo_sym, reduce(sparse(randn(n, n)), nothing))
+rx0 = FrankWolfe.compute_extreme_point(lmo_sym, deflate(sparse(randn(n, n)), nothing))
 
 @time rx, rv, rprimal, rdual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
     x -> cf(x, rxp, normxp2),
     (str, x) -> cgrad!(str, x, rxp),
     lmo_sym,
-    FrankWolfe.ActiveSetQuadratic([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
+    FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, rx0)], 2I/n^2, -2rxp/n^2);
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2/n^2),
     lazy=true,
diff --git a/examples/blended_pairwise_with_direct.jl b/examples/blended_pairwise_with_direct.jl
new file mode 100644
index 000000000..828436201
--- /dev/null
+++ b/examples/blended_pairwise_with_direct.jl
@@ -0,0 +1,174 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope. 
+
+Note the special structure of f(x) =  norm(x - x0)^2 that we assume here
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10_000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+# Just projection quadratic
+trajectoryBPCG_quadratic = []
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic),
+);
+
+as_quad = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, copy(x00))], 2 * LinearAlgebra.I, -2xp)
+
+# with quadratic active set
+trajectoryBPCG_quadratic_as = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_as),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_direct_generic = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+# with LP acceleration
+trajectoryBPCG_quadratic_noqas = []
+
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+# Update the data and labels for plotting
+data_trajectories = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic,
+    trajectoryBPCG_quadratic_as,
+    trajectoryBPCG_quadratic_direct,
+    trajectoryBPCG_quadratic_direct_generic,
+    trajectoryBPCG_quadratic_noqas,
+]
+labels_trajectories = [
+    "BPCG (Standard)",
+    "BPCG (Specific Direct)",
+    "AS_Quad",
+    "Reloaded",
+    "Reloaded_generic",
+    "Reloaded_noqas",
+]
+
+# Plot trajectories
+plot_trajectories(data_trajectories, labels_trajectories, xscalelog=false)
diff --git a/examples/blended_pairwise_with_direct_non-standard-quadratic.jl b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
new file mode 100644
index 000000000..ab934edd2
--- /dev/null
+++ b/examples/blended_pairwise_with_direct_non-standard-quadratic.jl
@@ -0,0 +1,146 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope which is not standard quadratic.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e2)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+A = let
+    A = randn(n, n)
+    A' * A
+end
+@assert isposdef(A) == true
+
+const y = Random.rand(Bool, n) * 0.6 .+ 0.3
+
+function f(x)
+    d = x - y
+    return dot(d, A, d)
+end
+
+function grad!(storage, x)
+    mul!(storage, A, x)
+    return mul!(storage, A, y, -2, 2)
+end
+
+
+# lmo = FrankWolfe.KSparseLMO(5, 1000.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
+lmo = FrankWolfe.UnitSimplexOracle(10000.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+callback = build_callback(trajectoryBPCG_standard)
+
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Adaptive(),
+    print_iter=k / 10,
+    memory_mode=FrankWolfe.InplaceEmphasis(),
+    verbose=true,
+    trajectory=true,
+    callback=callback,
+);
+
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=100, scaling_factor=1.2, max_interval=100),
+)
+trajectoryBPCG_quadratic_automatic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_automatic2 = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic2 = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic2,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic2),
+);
+
+
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=2),
+)
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_standard,
+    max_iteration=k,
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
+);
+
+
+dataSparsity = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic_automatic,
+    trajectoryBPCG_quadratic_automatic_standard,
+]
+labelSparsity = ["BPCG (Standard)", "AS_Quad", "AS_Standard"]
+
+
+# Plot trajectories
+plot_trajectories(dataSparsity, labelSparsity, xscalelog=false)
diff --git a/examples/blended_pairwise_with_sparsify.jl b/examples/blended_pairwise_with_sparsify.jl
new file mode 100644
index 000000000..7700268bb
--- /dev/null
+++ b/examples/blended_pairwise_with_sparsify.jl
@@ -0,0 +1,94 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+
+include("../examples/plot_utils.jl")
+
+n = Int(1e4)
+k = 10000
+
+s = 10
+@info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+# lmo = FrankWolfe.KSparseLMO(2, 1.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(1.0);
+# lmo = FrankWolfe.UnitSimplexOracle(1.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+FrankWolfe.benchmark_oracles(f, grad!, () -> randn(n), lmo; k=100)
+
+trajectoryBPCG_standard = []
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    trajectory=true,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+trajectoryBPCG_as_sparse = []
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+@time x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_sparse,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=true,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+# Reduction primal/dual error vs. sparsity of solution
+
+plot_data = [trajectoryBPCG_standard, trajectoryBPCG_as_sparse]
+plot_labels = ["BPCG (Standard)", "BPCG (Sparsify)"]
+
+# Plot sparsity
+plot_sparsity(plot_data, plot_labels, legend_position=:topright)
diff --git a/examples/docs_12_quadratic_symmetric.jl b/examples/docs_12_quadratic_symmetric.jl
index 82ca5bd12..160c7e86e 100644
--- a/examples/docs_12_quadratic_symmetric.jl
+++ b/examples/docs_12_quadratic_symmetric.jl
@@ -1,7 +1,7 @@
 # # Accelerations for quadratic functions and symmetric problems
 
-# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SymmetricLMO`.
-# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadratic`.
+# This example illustrates how to exploit symmetry to reduce the dimension of the problem via `SubspaceLMO`.
+# Moreover, active set based algorithms can be accelerated by using the specialized structure `ActiveSetQuadraticProductCaching`.
 
 # The specific problem we consider here comes from quantum information and some context can be found [here](https://arxiv.org/abs/2302.04721).
 # Formally, we want to find the distance between a tensor of size `m^N` and the `N`-partite local polytope which is defined by its vertices
@@ -124,11 +124,11 @@ println() #hide
 
 # A first acceleration can be obtained by using the active set specialized for the quadratic objective function,
 # whose gradient is here ``x-p``, explaining the hessian and linear part provided as arguments.
-# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration, the best and worst
-# atoms currently in the active set.
+# The speedup is obtained by pre-computing some scalar products to quickly obtained, in each iteration,
+# the best and worst atoms currently in the active set.
 
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_naive = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_naive = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_naive, asq_naive; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -165,9 +165,9 @@ println() #hide
 # a very small speedup by precomputing and storing `Combinatorics.permutations(1:N)`
 # in a dedicated field of our custom LMO.
 
-lmo_permutedims = FrankWolfe.SymmetricLMO(lmo_naive, reynolds_permutedims)
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_permutedims = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+lmo_permutedims = FrankWolfe.SubspaceLMO(lmo_naive, reynolds_permutedims)
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_permutedims = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_permutedims, asq_permutedims; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -197,9 +197,9 @@ function build_reynolds_unique(p::Array{T, N}) where {T <: Number, N}
     end
 end
 
-lmo_unique = FrankWolfe.SymmetricLMO(lmo_naive, build_reynolds_unique(p))
-FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_unique = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], LinearAlgebra.I, -p)
+lmo_unique = FrankWolfe.SubspaceLMO(lmo_naive, build_reynolds_unique(p))
+FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_unique = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], LinearAlgebra.I, -p)
 @time FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo_unique, asq_unique; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
@@ -214,7 +214,7 @@ println() #hide
 # accelerations, especially for active set based algorithms in the regime where many lazy iterations are performed.
 # We refer to the example `symmetric.jl` for a small benchmark with symmetric matrices.
 
-function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
+function build_deflate_inflate(p::Array{T, N}) where {T <: Number, N}
     ptol = round.(p; digits=8)
     ptol[ptol .== zero(T)] .= zero(T) # transform -0.0 into 0.0 as isequal(0.0, -0.0) is false
     uniquetol = unique(ptol[:])
@@ -227,8 +227,8 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
         for (i, ind) in enumerate(indices)
             vec[i] = sum(A[ind]) / sqmul[i]
         end
-        return FrankWolfe.SymmetricArray(A, vec)
-    end, function(x::FrankWolfe.SymmetricArray, lmo)
+        return FrankWolfe.SubspaceVector(A, vec)
+    end, function(x::FrankWolfe.SubspaceVector, lmo)
         for (i, ind) in enumerate(indices)
             @view(x.data[ind]) .= x.vec[i] / sqmul[i]
         end
@@ -236,16 +236,16 @@ function build_reduce_inflate(p::Array{T, N}) where {T <: Number, N}
     end
 end
 
-reduce, inflate = build_reduce_inflate(p)
-p_reduce = reduce(p, nothing)
-x0_reduce = reduce(x0, nothing)
-f_reduce = let p_reduce = p_reduce, normp2 = normp2
-    x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_reduce, x) + normp2
+deflate, inflate = build_deflate_inflate(p)
+p_deflate = deflate(p, nothing)
+x0_deflate = deflate(x0, nothing)
+f_deflate = let p_deflate = p_deflate, normp2 = normp2
+    x -> LinearAlgebra.dot(x, x) / 2 - LinearAlgebra.dot(p_deflate, x) + normp2
 end
-grad_reduce! = let p_reduce = p_reduce
+grad_deflate! = let p_deflate = p_deflate
     (storage, x) -> begin
         @inbounds for i in eachindex(x)
-            storage[i] = x[i] - p_reduce[i]
+            storage[i] = x[i] - p_deflate[i]
         end
     end
 end
@@ -255,9 +255,9 @@ println() #hide
 # In this simple example, their shape remains unchanged, but in general this may need some
 # reformulation, which falls to the user.
 
-lmo_reduce = FrankWolfe.SymmetricLMO(lmo_naive, reduce, inflate)
-FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
-asq_reduce = FrankWolfe.ActiveSetQuadratic([(one(T), x0_reduce)], LinearAlgebra.I, -p_reduce)
-@time FrankWolfe.blended_pairwise_conditional_gradient(f_reduce, grad_reduce!, lmo_reduce, asq_reduce; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
+lmo_deflate = FrankWolfe.SubspaceLMO(lmo_naive, deflate, inflate)
+FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate); verbose=false, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration=10) #hide
+asq_deflate = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0_deflate)], LinearAlgebra.I, -p_deflate)
+@time FrankWolfe.blended_pairwise_conditional_gradient(f_deflate, grad_deflate!, lmo_deflate, asq_deflate; verbose, lazy=true, line_search=FrankWolfe.Shortstep(one(T)), max_iteration)
 println() #hide
 
diff --git a/examples/quadratic.jl b/examples/quadratic.jl
index 923fbc4f3..450ea0901 100644
--- a/examples/quadratic.jl
+++ b/examples/quadratic.jl
@@ -74,7 +74,7 @@ function benchmark_Bell(p::Array{T, 2}, quadratic::Bool; fw_method=FrankWolfe.bl
     lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1)))
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
     if quadratic
-        active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+        active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     else
         active_set = FrankWolfe.ActiveSet([(one(T), x0)])
     end
diff --git a/examples/quadratic_A.jl b/examples/quadratic_A.jl
index 5ac879c47..c65643a82 100644
--- a/examples/quadratic_A.jl
+++ b/examples/quadratic_A.jl
@@ -48,7 +48,7 @@ x0 = FrankWolfe.compute_extreme_point(lmo, zeros(T, n+1))
 # active_set = FrankWolfe.ActiveSet([(1.0, x0)])
 
 # specialized active set, automatically detecting the parameters A and b of the quadratic function f
-active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], gradf)
+active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], gradf)
 
 @time res = FrankWolfe.blended_pairwise_conditional_gradient(
 #  @time res = FrankWolfe.away_frank_wolfe(
diff --git a/examples/reynolds.jl b/examples/reynolds.jl
index fe28bca53..0cb6f3c32 100644
--- a/examples/reynolds.jl
+++ b/examples/reynolds.jl
@@ -86,12 +86,12 @@ function benchmark_Bell(p::Array{T, 3}, sym::Bool; kwargs...) where {T <: Number
     end
     lmo = BellCorrelationsLMO{T}(size(p, 1), zeros(T, size(p, 1)))
     if sym
-        lmo = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint)
+        lmo = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint)
     end
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
     println("Output type of the LMO: ", typeof(x0))
     active_set = FrankWolfe.ActiveSet([(one(T), x0)])
-    # active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+    # active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     return FrankWolfe.blended_pairwise_conditional_gradient(f, grad!, lmo, active_set; lazy=true, line_search=FrankWolfe.Shortstep(one(T)), kwargs...)
 end
 
diff --git a/examples/simple_stateless.jl b/examples/simple_stateless.jl
index 6a550a963..6bf230e26 100644
--- a/examples/simple_stateless.jl
+++ b/examples/simple_stateless.jl
@@ -71,7 +71,7 @@ plot_trajectories([trajectory_simple[1:end], trajectory_restart[1:end]], ["simpl
 
 # simple step iterations about 33% faster
 
-@test line_search.factor <= 44
+@test line_search.factor <= 51
 
 x, v, primal, dual_gap, trajectory_restart_highpres = FrankWolfe.frank_wolfe(
     f,
diff --git a/examples/symmetric.jl b/examples/symmetric.jl
index 000ca53bc..1d1018cf9 100644
--- a/examples/symmetric.jl
+++ b/examples/symmetric.jl
@@ -57,7 +57,7 @@ function correlation_tensor_GHZ_polygon(N::Int, m::Int; type=Float64)
     return res
 end
 
-function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
+function build_deflate_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
     n = size(p, 1)
     @assert n == size(p, 2)
     dimension = (n * (n + 1)) ÷ 2
@@ -72,8 +72,8 @@ function build_reduce_inflate_permutedims(p::Array{T, 2}) where {T <: Number}
                 vec[cnt+j] = (A[i, j] + A[j, i]) / sqrt2
             end
         end
-        return FrankWolfe.SymmetricArray(A, vec)
-    end, function(x::FrankWolfe.SymmetricArray, lmo)
+        return FrankWolfe.SubspaceVector(A, vec)
+    end, function(x::FrankWolfe.SubspaceVector, lmo)
         cnt = 0
         @inbounds for i in 1:n
             x.data[i, i] = x.vec[i]
@@ -90,9 +90,9 @@ end
 function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_pairwise_conditional_gradient, kwargs...) where {T <: Number}
     Random.seed!(0)
     if sym
-        reduce, inflate = build_reduce_inflate_permutedims(p)
-        lmo = FrankWolfe.SymmetricLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), reduce, inflate)
-        p = reduce(p, lmo)
+        deflate, inflate = build_deflate_inflate_permutedims(p)
+        lmo = FrankWolfe.SubspaceLMO(BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1))), deflate, inflate)
+        p = deflate(p, lmo)
     else
         lmo = BellCorrelationsLMOHeuristic{T}(size(p, 1), zeros(T, size(p, 1)))
     end
@@ -109,7 +109,7 @@ function benchmark_Bell(p::Array{T, 2}, sym::Bool; fw_method=FrankWolfe.blended_
         end
     end
     x0 = FrankWolfe.compute_extreme_point(lmo, -p)
-    active_set = FrankWolfe.ActiveSetQuadratic([(one(T), x0)], I, -p)
+    active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(one(T), x0)], I, -p)
     res = fw_method(f, grad!, lmo, active_set; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, verbose=false, max_iteration=10^2)
     return fw_method(f, grad!, lmo, res[6]; line_search=FrankWolfe.Shortstep(one(T)), lazy=true, lazy_tolerance=10^6, kwargs...)
 end
diff --git a/src/FrankWolfe.jl b/src/FrankWolfe.jl
index e39503847..253863085 100644
--- a/src/FrankWolfe.jl
+++ b/src/FrankWolfe.jl
@@ -36,6 +36,8 @@ include("moi_oracle.jl")
 include("function_gradient.jl")
 include("active_set.jl")
 include("active_set_quadratic.jl")
+include("active_set_quadratic_direct_solve.jl")
+include("active_set_sparsifier.jl")
 
 include("blended_cg.jl")
 include("afw.jl")
diff --git a/src/abstract_oracles.jl b/src/abstract_oracles.jl
index 5b61960f0..c1c1154e4 100644
--- a/src/abstract_oracles.jl
+++ b/src/abstract_oracles.jl
@@ -258,23 +258,28 @@ function compute_extreme_point(
 end
 
 """
-    SymmetricLMO{LMO, TR, TI}
+    SubspaceLMO{LMO, TD, TI}
 
-Symmetric LMO for the reduction operator defined by `TR`
+Subspace LMO for the deflation operator defined by `TD`
 and the inflation operator defined by `TI`.
-Computations are performed in the reduced subspace, and the
+Computations are performed in the deflated subspace, and the
 effective call of the LMO first inflates the gradient, then
-use the non-symmetric LMO, and finally reduces the output.
+uses the full LMO, and finally deflates the output.
+
+Deflation operators typically project onto symmetric subspaces
+or select relevant elements of a full tensor.
+Scalar products should be treated carefully to ensure correctness
+of the results; see also the companion `SubspaceVector` structure.
 """
-struct SymmetricLMO{LMO<:LinearMinimizationOracle,TR,TI} <: LinearMinimizationOracle
+struct SubspaceLMO{LMO<:LinearMinimizationOracle,TD,TI} <: LinearMinimizationOracle
     lmo::LMO
-    reduce::TR
+    deflate::TD
     inflate::TI
-    function SymmetricLMO(lmo::LMO, reduce, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle}
-        return new{typeof(lmo),typeof(reduce),typeof(inflate)}( lmo, reduce, inflate)
+    function SubspaceLMO(lmo::LMO, deflate, inflate=(x, lmo) -> x) where {LMO<:LinearMinimizationOracle}
+        return new{typeof(lmo),typeof(deflate),typeof(inflate)}( lmo, deflate, inflate)
     end
 end
 
-function compute_extreme_point(sym::SymmetricLMO, direction; kwargs...)
-    return sym.reduce(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo)
+function compute_extreme_point(sym::SubspaceLMO, direction; kwargs...)
+    return sym.deflate(compute_extreme_point(sym.lmo, sym.inflate(direction, sym.lmo)), sym.lmo)
 end
diff --git a/src/active_set.jl b/src/active_set.jl
index 62393b47f..f02177933 100644
--- a/src/active_set.jl
+++ b/src/active_set.jl
@@ -27,17 +27,7 @@ ActiveSet{AT,R}() where {AT,R} = ActiveSet{AT,R,Vector{float(eltype(AT))}}([], [
 ActiveSet{AT}() where {AT} = ActiveSet{AT,Float64,Vector{float(eltype(AT))}}()
 
 function ActiveSet(tuple_values::AbstractVector{Tuple{R,AT}}) where {AT,R}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-    end
-    x = similar(atoms[1], float(eltype(atoms[1])))
-    as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x)
-    compute_active_set_iterate!(as)
-    return as
+    return ActiveSet{AT,R}(tuple_values)
 end
 
 function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}) where {AT,R}
@@ -48,7 +38,7 @@ function ActiveSet{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}})
         weights[idx] = tuple_values[idx][1]
         atoms[idx] = tuple_values[idx][2]
     end
-    x = similar(tuple_values[1][2], float(eltype(tuple_values[1][2])))
+    x = similar(atoms[1], float(eltype(atoms[1])))
     as = ActiveSet{AT,R,typeof(x)}(weights, atoms, x)
     compute_active_set_iterate!(as)
     return as
@@ -326,3 +316,7 @@ function compute_active_set_iterate!(active_set::AbstractActiveSet{<:ScaledHotVe
     end
     return active_set.x
 end
+
+function update_weights!(as::AbstractActiveSet, new_weights)
+    as.weights .= new_weights
+end
diff --git a/src/active_set_quadratic.jl b/src/active_set_quadratic.jl
index b200a8912..0b19d7dba 100644
--- a/src/active_set_quadratic.jl
+++ b/src/active_set_quadratic.jl
@@ -1,6 +1,6 @@
 
 """
-    ActiveSetQuadratic{AT, R, IT}
+    ActiveSetQuadraticProductCaching{AT, R, IT}
 
 Represents an active set of extreme vertices collected in a FW algorithm,
 along with their coefficients `(λ_i, a_i)`.
@@ -9,7 +9,7 @@ The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
 The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
 so that the gradient is simply `∇f(x)=Ax+b`.
 """
-struct ActiveSetQuadratic{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
+struct ActiveSetQuadraticProductCaching{AT, R <: Real, IT, H} <: AbstractActiveSet{AT,R,IT}
     weights::Vector{R}
     atoms::Vector{AT}
     x::IT
@@ -48,11 +48,21 @@ function detect_quadratic_function(grad!, x0; test=true)
     return A, b
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, grad!::Function) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticProductCaching(tuple_values, A, b)
 end
 
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b) where {AT,R,H}
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
+end
+
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!::Function) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
+end
+
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
     n = length(tuple_values)
     weights = Vector{R}(undef, n)
     atoms = Vector{AT}(undef, n)
@@ -64,43 +74,24 @@ function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::H, b)
     @inbounds for idx in 1:n
         weights[idx] = tuple_values[idx][1]
         atoms[idx] = tuple_values[idx][2]
-        dots_A[idx] = Vector{R}(undef, idx)
-        for idy in 1:idx
-            dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy])
-        end
-        dots_b[idx] = fast_dot(b, atoms[idx])
     end
     x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    as = ActiveSetQuadraticProductCaching{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
+    reset_quadratic_dots!(as)
     compute_active_set_iterate!(as)
     return as
 end
 
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, grad!) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, detect_quadratic_function(grad!, tuple_values[1][2])...)
-end
-
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::H, b) where {AT,R,H}
-    n = length(tuple_values)
-    weights = Vector{R}(undef, n)
-    atoms = Vector{AT}(undef, n)
-    dots_x = zeros(R, n)
-    dots_A = Vector{Vector{R}}(undef, n)
-    dots_b = Vector{R}(undef, n)
-    weights_prev = zeros(R, n)
-    modified = trues(n)
-    @inbounds for idx in 1:n
-        weights[idx] = tuple_values[idx][1]
-        atoms[idx] = tuple_values[idx][2]
-        dots_A[idx] = Vector{R}(undef, idx)
+# should only be called upon construction
+# for active sets with a large number of atoms, this function becomes very costly
+function reset_quadratic_dots!(as::ActiveSetQuadraticProductCaching{AT,R}) where {AT,R}
+    @inbounds for idx in 1:length(as)
+        as.dots_A[idx] = Vector{R}(undef, idx)
         for idy in 1:idx
-            dots_A[idx][idy] = fast_dot(A * atoms[idx], atoms[idy])
+            as.dots_A[idx][idy] = fast_dot(as.A * as.atoms[idx], as.atoms[idy])
         end
-        dots_b[idx] = fast_dot(b, atoms[idx])
+        as.dots_b[idx] = fast_dot(as.b, as.atoms[idx])
     end
-    x = similar(b)
-    as = ActiveSetQuadratic{AT,R,typeof(x),H}(weights, atoms, x, A, b, dots_x, dots_A, dots_b, weights_prev, modified)
-    compute_active_set_iterate!(as)
     return as
 end
 
@@ -116,16 +107,17 @@ function Base.:*(a::Identity, b)
         return a.λ * b
     end
 end
-function ActiveSetQuadratic(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic(tuple_values, Identity(A.λ), b)
+
+function ActiveSetQuadraticProductCaching(tuple_values::AbstractVector{Tuple{R,AT}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadraticProductCaching(tuple_values, Identity(A.λ), b)
 end
-function ActiveSetQuadratic{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
-    return ActiveSetQuadratic{AT,R}(tuple_values, Identity(A.λ), b)
+function ActiveSetQuadraticProductCaching{AT,R}(tuple_values::AbstractVector{<:Tuple{<:Number,<:Any}}, A::UniformScaling, b) where {AT,R}
+    return ActiveSetQuadraticProductCaching{AT,R}(tuple_values, Identity(A.λ), b)
 end
 
 # these three functions do not update the active set iterate
 
-function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
+function Base.push!(as::ActiveSetQuadraticProductCaching{AT,R}, (λ, a)) where {AT,R}
     dot_x = zero(R)
     dot_A = Vector{R}(undef, length(as))
     dot_b = fast_dot(as.b, a)
@@ -147,7 +139,8 @@ function Base.push!(as::ActiveSetQuadratic{AT,R}, (λ, a)) where {AT,R}
     return as
 end
 
-function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
+# TODO multi-indices version
+function Base.deleteat!(as::ActiveSetQuadraticProductCaching, idx::Int)
     @inbounds for i in 1:idx-1
         as.dots_x[i] -= as.weights_prev[idx] * as.dots_A[idx][i]
     end
@@ -165,7 +158,7 @@ function Base.deleteat!(as::ActiveSetQuadratic, idx::Int)
     return as
 end
 
-function Base.empty!(as::ActiveSetQuadratic)
+function Base.empty!(as::ActiveSetQuadraticProductCaching)
     empty!(as.atoms)
     empty!(as.weights)
     as.x .= 0
@@ -178,7 +171,7 @@ function Base.empty!(as::ActiveSetQuadratic)
 end
 
 function active_set_update!(
-    active_set::ActiveSetQuadratic{AT,R},
+    active_set::ActiveSetQuadraticProductCaching{AT,R},
     lambda, atom, renorm=true, idx=nothing;
     weight_purge_threshold=weight_purge_threshold_default(R),
     add_dropped_vertices=false,
@@ -207,7 +200,7 @@ function active_set_update!(
     return active_set
 end
 
-function active_set_renormalize!(active_set::ActiveSetQuadratic)
+function active_set_renormalize!(active_set::ActiveSetQuadraticProductCaching)
     renorm = sum(active_set.weights)
     active_set.weights ./= renorm
     active_set.weights_prev ./= renorm
@@ -216,7 +209,7 @@ function active_set_renormalize!(active_set::ActiveSetQuadratic)
     return active_set
 end
 
-function active_set_argmin(active_set::ActiveSetQuadratic, direction)
+function active_set_argmin(active_set::ActiveSetQuadraticProductCaching, direction)
     valm = typemax(eltype(direction))
     idxm = -1
     idx_modified = findall(active_set.modified)
@@ -247,7 +240,7 @@ function active_set_argmin(active_set::ActiveSetQuadratic, direction)
     return (active_set[idxm]..., idxm)
 end
 
-function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5)
+function active_set_argminmax(active_set::ActiveSetQuadraticProductCaching, direction; Φ=0.5)
     valm = typemax(eltype(direction))
     valM = typemin(eltype(direction))
     idxm = -1
@@ -288,7 +281,7 @@ function active_set_argminmax(active_set::ActiveSetQuadratic, direction; Φ=0.5)
 end
 
 # in-place warm-start of a quadratic active set for A and b
-function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b) where {AT,R,H}
+function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R}, A::H, b) where {AT,R,H}
     @inbounds for idx in eachindex(warm_as)
         for idy in 1:idx
             warm_as.dots_A[idx][idy] = fast_dot(A * warm_as.atoms[idx], warm_as.atoms[idy])
@@ -299,7 +292,7 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R}, A::H, b
 end
 
 # in-place warm-start of a quadratic active set for b
-function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b) where {AT,R,IT,H}
+function update_active_set_quadratic!(warm_as::ActiveSetQuadraticProductCaching{AT,R,IT,H}, b) where {AT,R,IT,H}
     warm_as.dots_x .= 0
     warm_as.weights_prev .= 0
     warm_as.modified .= true
@@ -311,3 +304,8 @@ function update_active_set_quadratic!(warm_as::ActiveSetQuadratic{AT,R,IT,H}, b)
     return warm_as
 end
 
+function update_weights!(as::ActiveSetQuadraticProductCaching, new_weights)
+    as.weights_prev .= as.weights
+    as.weights .= new_weights
+    as.modified .= true
+end
diff --git a/src/active_set_quadratic_direct_solve.jl b/src/active_set_quadratic_direct_solve.jl
new file mode 100644
index 000000000..4a560377d
--- /dev/null
+++ b/src/active_set_quadratic_direct_solve.jl
@@ -0,0 +1,343 @@
+
+"""
+    ActiveSetQuadraticLinearSolve{AT, R, IT}
+
+Represents an active set of extreme vertices collected in a FW algorithm,
+along with their coefficients `(λ_i, a_i)`.
+`R` is the type of the `λ_i`, `AT` is the type of the atoms `a_i`.
+The iterate `x = ∑λ_i a_i` is stored in x with type `IT`.
+The objective function is assumed to be of the form `f(x)=½⟨x,Ax⟩+⟨b,x⟩+c`
+so that the gradient is `∇f(x)=Ax+b`.
+
+This active set stores an inner `active_set` that keeps track of the current set of vertices and convex decomposition.
+It therefore delegates all update, deletion, and addition operations to this inner active set.
+The `weight`, `atoms`, and `x` fields should only be accessed to read and are effectively the same objects as those in the inner active set.
+
+The structure also contains a scheduler struct which is called with the `should_solve_lp` function.
+To define a new frequency at which the LP should be solved, one can define another scheduler struct and implement the corresponding method.
+"""
+struct ActiveSetQuadraticLinearSolve{
+    AT,
+    R<:Real,
+    IT,
+    H,
+    BT,
+    OT<:MOI.AbstractOptimizer,
+    AS<:AbstractActiveSet,
+    SF,
+} <: AbstractActiveSet{AT,R,IT}
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    A::H # Hessian matrix
+    b::BT # linear term
+    active_set::AS
+    lp_optimizer::OT
+    scheduler::SF
+    counter::Base.RefValue{Int}
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, grad!::Function, lp_optimizer)
+
+Creates an ActiveSetQuadraticLinearSolve by computing the Hessian and linear term from `grad!`.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve(tuple_values, A, b, lp_optimizer, scheduler=scheduler)
+end
+
+"""
+    ActiveSetQuadraticLinearSolve(tuple_values::Vector{Tuple{R,AT}}, A, b, lp_optimizer)
+
+Creates an `ActiveSetQuadraticLinearSolve` from the given Hessian `A`, linear term `b` and `lp_optimizer` by creating an inner `ActiveSetQuadraticProductCaching` active set.
+"""
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadraticProductCaching(tuple_values, A, b)
+    return ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        inner_as.A,
+        inner_as.b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    A::LinearAlgebra.UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    as = ActiveSetQuadraticLinearSolve(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    inner_as::AbstractActiveSet,
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+)
+    A, b = detect_quadratic_function(grad!, inner_as.atoms[1])
+    return ActiveSetQuadraticLinearSolve(inner_as, A, b, lp_optimizer; scheduler=scheduler)
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    grad!::Function,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    A, b = detect_quadratic_function(grad!, tuple_values[1][2])
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        A,
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::H,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R,H}
+    inner_as = ActiveSetQuadraticProductCaching{AT,R}(tuple_values, A, b)
+    as = ActiveSetQuadraticLinearSolve{AT,R,typeof(x),H}(
+        inner_as.weights,
+        inner_as.atoms,
+        inner_as.x,
+        A,
+        b,
+        inner_as,
+        lp_optimizer,
+        scheduler,
+        Ref(0),
+    )
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function ActiveSetQuadraticLinearSolve(
+    tuple_values::Vector{Tuple{R,AT}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+function ActiveSetQuadraticLinearSolve{AT,R}(
+    tuple_values::Vector{<:Tuple{<:Number,<:Any}},
+    A::UniformScaling,
+    b,
+    lp_optimizer;
+    scheduler=LogScheduler(),
+) where {AT,R}
+    return ActiveSetQuadraticLinearSolve{AT,R}(
+        tuple_values,
+        Identity(A.λ),
+        b,
+        lp_optimizer;
+        scheduler=scheduler,
+    )
+end
+
+# all mutating functions are delegated to the inner active set
+
+Base.push!(as::ActiveSetQuadraticLinearSolve, tuple) = push!(as.active_set, tuple)
+
+Base.deleteat!(as::ActiveSetQuadraticLinearSolve, idx) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetQuadraticLinearSolve) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetQuadraticLinearSolve{AT,R},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    weight_purge_threshold=weight_purge_threshold_default(R),
+    add_dropped_vertices=false,
+    vertex_storage=nothing,
+) where {AT,R}
+    if idx === nothing
+        idx = find_atom(as, atom)
+    end
+    active_set_update!(
+        as.active_set,
+        lambda,
+        atom,
+        renorm,
+        idx;
+        weight_purge_threshold=weight_purge_threshold,
+        add_dropped_vertices=add_dropped_vertices,
+        vertex_storage=vertex_storage,
+    )
+    # new atom introduced, we can solve the auxiliary LP
+    if idx < 0
+        as.counter[] += 1
+        if should_solve_lp(as, as.scheduler)
+            solve_quadratic_activeset_lp!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetQuadraticLinearSolve) = active_set_renormalize!(as.active_set)
+
+function active_set_argmin(as::ActiveSetQuadraticLinearSolve, direction)
+    return active_set_argmin(as.active_set, direction)
+end
+
+function active_set_argminmax(as::ActiveSetQuadraticLinearSolve, direction; Φ=0.5)
+    return active_set_argminmax(as.active_set, direction; Φ=Φ)
+end
+
+# generic quadratic with quadratic information provided
+
+"""
+    solve_quadratic_activeset_lp!(as::ActiveSetQuadraticLinearSolve{AT, R, IT, H}))
+
+Solves the auxiliary LP over the current active set.
+The method is specialized by type `H` of the Hessian matrix `A`.
+"""
+function solve_quadratic_activeset_lp!(
+    as::ActiveSetQuadraticLinearSolve{AT,R,IT,H},
+) where {AT,R,IT,H}
+    nv = length(as)
+    o = as.lp_optimizer
+    MOI.empty!(o)
+    λ = MOI.add_variables(o, nv)
+    # λ ≥ 0, ∑ λ == 1
+    MOI.add_constraint.(o, λ, MOI.GreaterThan(0.0))
+    sum_of_variables = MOI.ScalarAffineFunction(MOI.ScalarAffineTerm.(1.0, λ), 0.0)
+    MOI.add_constraint(o, sum_of_variables, MOI.EqualTo(1.0))
+    # Vᵗ A V λ == -Vᵗ b
+    for atom in as.atoms
+        lhs = MOI.ScalarAffineFunction{Float64}([], 0.0)
+        Base.sizehint!(lhs.terms, nv)
+        # replaces direct sum because of MOI and MutableArithmetic slow sums
+        for j in 1:nv
+            push!(
+                lhs.terms,
+                _compute_quadratic_constraint_term(atom, as.A, as.atoms[j], λ[j]),
+            )
+        end
+        rhs = -dot(atom, as.b)
+        MOI.add_constraint(o, lhs, MOI.EqualTo(rhs))
+    end
+    MOI.set(o, MOI.ObjectiveFunction{typeof(sum_of_variables)}(), sum_of_variables)
+    MOI.set(o, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+    MOI.optimize!(o)
+    if MOI.get(o, MOI.TerminationStatus()) ∉ (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+        return as
+    end
+    indices_to_remove = Int[]
+    new_weights = R[]
+    for idx in eachindex(λ)
+        weight_value = MOI.get(o, MOI.VariablePrimal(), λ[idx])
+        if weight_value <= 2 * weight_purge_threshold_default(typeof(weight_value))
+            push!(indices_to_remove, idx)
+        else
+            push!(new_weights, weight_value)
+        end
+    end
+    deleteat!(as.active_set, indices_to_remove)
+    @assert length(as) == length(new_weights)
+    update_weights!(as.active_set, new_weights)
+    active_set_cleanup!(as)
+    active_set_renormalize!(as)
+    compute_active_set_iterate!(as)
+    return as
+end
+
+function _compute_quadratic_constraint_term(atom1, A::AbstractMatrix, atom2, λ)
+    return MOI.ScalarAffineTerm(fast_dot(atom1, A, atom2), λ)
+end
+
+function _compute_quadratic_constraint_term(atom1, A::Union{Identity,LinearAlgebra.UniformScaling}, atom2, λ)
+    return MOI.ScalarAffineTerm(A.λ * fast_dot(atom1, atom2), λ)
+end
+
+struct LogScheduler{T}
+    start_time::Int
+    scaling_factor::T
+    max_interval::Int
+    current_interval::Base.RefValue{Int}
+    last_solve_counter::Base.RefValue{Int}
+end
+
+LogScheduler(; start_time=20, scaling_factor=1.5, max_interval=1000) =
+    LogScheduler(start_time, scaling_factor, max_interval, Ref(start_time), Ref(0))
+
+function should_solve_lp(as::ActiveSetQuadraticLinearSolve, scheduler::LogScheduler)
+    if as.counter[] - scheduler.last_solve_counter[] >= scheduler.current_interval[]
+        scheduler.last_solve_counter[] = as.counter[]
+        scheduler.current_interval[] = min(
+            round(Int, scheduler.scaling_factor * scheduler.current_interval[]),
+            scheduler.max_interval,
+        )
+        return true
+    end
+    return false
+end
diff --git a/src/active_set_sparsifier.jl b/src/active_set_sparsifier.jl
new file mode 100644
index 000000000..6a408a0ac
--- /dev/null
+++ b/src/active_set_sparsifier.jl
@@ -0,0 +1,96 @@
+struct ActiveSetSparsifier{AT,R,IT,AS<:AbstractActiveSet{AT,R,IT},OT<:MOI.AbstractOptimizer} <:
+       AbstractActiveSet{AT,R,IT}
+    active_set::AS
+    weights::Vector{R}
+    atoms::Vector{AT}
+    x::IT
+    optimizer::OT
+    minimum_vertices::Int
+    solve_frequency::Int
+    counter::Base.RefValue{Int}
+end
+
+function ActiveSetSparsifier(
+    active_set::AbstractActiveSet,
+    optimizer::MOI.AbstractOptimizer;
+    minimum_vertices=50,
+    solve_frequency=50,
+)
+    return ActiveSetSparsifier(
+        active_set,
+        active_set.weights,
+        active_set.atoms,
+        active_set.x,
+        optimizer,
+        minimum_vertices,
+        solve_frequency,
+        Ref(0),
+    )
+end
+
+function Base.push!(as::ActiveSetSparsifier, (λ, a))
+    return push!(as.active_set, (λ, a))
+end
+
+Base.deleteat!(as::ActiveSetSparsifier, idx::Int) = deleteat!(as.active_set, idx)
+
+Base.empty!(as::ActiveSetSparsifier) = empty!(as.active_set)
+
+function active_set_update!(
+    as::ActiveSetSparsifier{AT,R,IT,AS},
+    lambda,
+    atom,
+    renorm=true,
+    idx=nothing;
+    kwargs...,
+) where {AS,AT,R,IT}
+    active_set_update!(as.active_set, lambda, atom, renorm, idx; kwargs...)
+    n = length(as)
+    as.counter[] += 1
+    if n > as.minimum_vertices && mod(as.counter[], as.solve_frequency) == 0
+        # sparsifying active set
+        MOI.empty!(as.optimizer)
+        x0 = as.active_set.x
+        λ = MOI.add_variables(as.optimizer, n)
+        # λ ∈ Δ_n ⇔ λ ≥ 0, ∑ λ == 1
+        MOI.add_constraint.(as.optimizer, λ, MOI.GreaterThan(0.0))
+        MOI.add_constraint(as.optimizer, sum(λ; init=0.0), MOI.EqualTo(1.0))
+        x_sum = 0 * as.active_set.atoms[1]
+        for (idx, atom) in enumerate(as.active_set.atoms)
+            x_sum += λ[idx] * atom
+        end
+        for idx in eachindex(x_sum)
+            MOI.add_constraint(as.optimizer, x_sum[idx], MOI.EqualTo(x0[idx]))
+        end
+        # Set a dummy objective (minimize ∑λ)
+        dummy_objective = sum(λ; init=0.0)
+        MOI.set(as.optimizer, MOI.ObjectiveFunction{typeof(dummy_objective)}(), dummy_objective)
+        MOI.set(as.optimizer, MOI.ObjectiveSense(), MOI.MIN_SENSE)
+        MOI.optimize!(as.optimizer)
+        if MOI.get(as.optimizer, MOI.TerminationStatus()) in
+           (MOI.OPTIMAL, MOI.FEASIBLE_POINT, MOI.ALMOST_OPTIMAL)
+            indices_to_remove = Int[]
+            new_weights = R[]
+            for idx in eachindex(λ)
+                weight_value = MOI.get(as.optimizer, MOI.VariablePrimal(), λ[idx])
+                if weight_value <= sqrt(weight_purge_threshold_default(typeof(weight_value)))
+                    push!(indices_to_remove, idx)
+                else
+                    push!(new_weights, weight_value)
+                end
+            end
+            deleteat!(as.active_set.atoms, indices_to_remove)
+            deleteat!(as.active_set.weights, indices_to_remove)
+            @assert length(as) == length(new_weights)
+            as.active_set.weights .= new_weights
+            active_set_renormalize!(as)
+        end
+    end
+    return as
+end
+
+active_set_renormalize!(as::ActiveSetSparsifier) = active_set_renormalize!(as.active_set)
+
+active_set_argmin(as::ActiveSetSparsifier, direction) = active_set_argmin(as.active_set, direction)
+active_set_argminmax(as::ActiveSetSparsifier, direction; Φ=0.5) =
+    active_set_argminmax(as.active_set, direction; Φ=Φ)
diff --git a/src/alternating_methods.jl b/src/alternating_methods.jl
index ec1c5b2f5..d07a73649 100644
--- a/src/alternating_methods.jl
+++ b/src/alternating_methods.jl
@@ -35,12 +35,12 @@ end
 
 Alternating Linear Minimization minimizes the objective `f` over the intersections of the feasible domains specified by `lmos`.
 The tuple `x0` defines the initial points for each domain.
-Returns a tuple `(x, v, primal, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, primal, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `primal` primal value `f(x)`
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 of the sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_linear_minimization(
@@ -49,17 +49,17 @@ function alternating_linear_minimization(
     grad!,
     lmos::NTuple{N,LinearMinimizationOracle},
     x0::Tuple{Vararg{Any,N}};
-    lambda::Union{Float64, Function}=1.0,
+    lambda::Union{Float64,Function}=1.0,
     verbose=false,
     trajectory=false,
     callback=nothing,
     max_iteration=10000,
-    print_iter = max_iteration / 10,
+    print_iter=max_iteration / 10,
     memory_mode=InplaceEmphasis(),
     line_search::LS=Adaptive(),
     epsilon=1e-7,
     kwargs...,
-) where {N, LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
+) where {N,LS<:Union{LineSearchMethod,NTuple{N,LineSearchMethod}}}
 
     x0_bc = BlockVector([x0[i] for i in 1:N], [size(x0[i]) for i in 1:N], sum(length, x0))
     gradf = similar(x0_bc)
@@ -74,21 +74,14 @@ function alternating_linear_minimization(
             for i in 1:N
                 grad!(gradf.blocks[i], x.blocks[i])
             end
-            t = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
+            t = [N * b - sum(x.blocks) for b in x.blocks]
             return storage.blocks = λ[] * gradf.blocks + t
         end
     end
 
-    function dist2(x::BlockVector) 
-        s = 0
-        for i=1:N
-            for j=1:i-1
-                diff = x.blocks[i] - x.blocks[j]
-                s += fast_dot(diff, diff)
-            end
-        end
-        return s
-    end
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function build_objective()
         λ = Ref(λ0)
@@ -122,8 +115,11 @@ function alternating_linear_minimization(
 
         num_type = eltype(x0[1])
         grad_type = eltype(gradf.blocks[1])
-        line_search_type = line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
-        println("MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration")
+        line_search_type =
+            line_search isa Tuple ? [typeof(a) for a in line_search] : typeof(line_search)
+        println(
+            "MEMORY_MODE: $memory_mode STEPSIZE: $line_search_type EPSILON: $epsilon MAXITERATION: $max_iteration",
+        )
         println("TYPE: $num_type GRADIENTTYPE: $grad_type")
         println("LAMBDA: $lambda")
 
@@ -177,7 +173,7 @@ function alternating_linear_minimization(
     end
 
     if lambda isa Function
-        callback  = function (state,args...)
+        callback = function (state, args...)
             state.f.λ[] = lambda(state)
             state.grad!.λ[] = state.f.λ[]
 
@@ -211,33 +207,15 @@ function alternating_linear_minimization(
 end
 
 
-function ProjectionFW(y, lmo; max_iter=10000, eps=1e-3)
-    f(x) = sum(abs2, x - y)
-    grad!(storage, x) = storage .= 2 * (x - y)
-
-    x0 = FrankWolfe.compute_extreme_point(lmo, y)
-    x_opt, _ = FrankWolfe.frank_wolfe(
-        f,
-        grad!,
-        lmo,
-        x0,
-        epsilon=eps,
-        max_iteration=max_iter,
-        trajectory=true,
-        line_search=FrankWolfe.Adaptive(verbose=false, relaxed_smoothness=false),
-    )
-    return x_opt
-end
-
 """
     alternating_projections(lmos::NTuple{N,LinearMinimizationOracle}, x0; ...) where {N}
 
 Computes a point in the intersection of feasible domains specified by `lmos`.
-Returns a tuple `(x, v, dual_gap, infeas, traj_data)` with:
+Returns a tuple `(x, v, dual_gap, dist2, traj_data)` with:
 - `x` cartesian product of final iterates
 - `v` cartesian product of last vertices of the LMOs
 - `dual_gap` final Frank-Wolfe gap
-- `infeas` sum of squared, pairwise distances between iterates
+- `dist2` is 1/2 * sum of squared, pairwise distances between iterates
 - `traj_data` vector of trajectory information.
 """
 function alternating_projections(
@@ -252,6 +230,10 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
     return alternating_projections(
         ProductLMO(lmos),
@@ -265,6 +247,10 @@ function alternating_projections(
         callback,
         traj_data,
         timeout,
+        proj_method,
+        inner_epsilon,
+        reuse_active_set,
+        kwargs...,
     )
 end
 
@@ -281,17 +267,21 @@ function alternating_projections(
     callback=nothing,
     traj_data=[],
     timeout=Inf,
+    proj_method=frank_wolfe,
+    inner_epsilon::Function=t -> 1 / (t^2 + 1),
+    reuse_active_set=false,
+    kwargs...,
 ) where {N}
 
     # header and format string for output of the algorithm
-    headers = ["Type", "Iteration", "Dual Gap", "Infeas", "Time", "It/sec"]
+    headers = ["Type", "Iteration", "Dual Gap", "dist2", "Time", "It/sec"]
     format_string = "%6s %13s %14e %14e %14e %14e\n"
-    function format_state(state, infeas)
+    function format_state(state, primal)
         rep = (
             steptype_string[Symbol(state.step_type)],
             string(state.t),
             Float64(state.dual_gap),
-            Float64(infeas),
+            Float64(primal),
             state.time,
             state.t / state.time,
         )
@@ -300,27 +290,61 @@ function alternating_projections(
 
     t = 0
     dual_gap = Inf
-    x = fill(x0, N)
-    v = similar(x)
+    dual_gaps = fill(Inf, N)
+    x = BlockVector(compute_extreme_point.(lmo.lmos, fill(x0, N)))
     step_type = ST_REGULAR
     gradient = similar(x)
-    ndim = ndims(x)
 
-    infeasibility(x) = sum(
-        fast_dot(
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-            selectdim(x, ndim, i) - selectdim(x, ndim, j),
-        ) for i in 1:N for j in 1:i-1
-    )
+    if reuse_active_set
+        if proj_method ∉
+           [away_frank_wolfe, blended_pairwise_conditional_gradient, pairwise_frank_wolfe]
+            error("The selected FW method does not support active sets reuse.")
+        end
+        active_sets = [ActiveSet([(1.0, x.blocks[i])]) for i in 1:N]
+    end
 
-    partial_infeasibility(x) =
-        sum(fast_dot(x[mod(i - 2, N)+1] - x[i], x[mod(i - 2, N)+1] - x[i]) for i in 1:N)
+    dist2(x::BlockVector) =
+        0.5 * (N - 1) * sum(fast_dot(x.blocks[i], x.blocks[i]) for i in 1:N) -
+        sum(fast_dot(x.blocks[i], x.blocks[j]) for i in 1:N for j in 1:i-1)
 
     function grad!(storage, x)
-        @. storage = [2 * (x[i] - x[mod(i - 2, N)+1]) for i in 1:N]
+        return storage.blocks = [2.0 * (N * b - sum(x.blocks)) for b in x.blocks]
     end
 
-    projection_step(x, i, t) = ProjectionFW(x, lmo.lmos[i]; eps=1 / (t^2 + 1))
+    function projection_step(i, t)
+        xii = x.blocks[mod(i - 2, N)+1] # iterate in previous block
+        f(y) = sum(abs2, y - xii)
+        function grad_proj!(storage, y)
+            return storage .= 2 * (y - xii)
+        end
+
+        if reuse_active_set
+
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                active_sets[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+            active_sets[i] = results[:active_set]
+        else
+            results = proj_method(
+                f,
+                grad_proj!,
+                lmo.lmos[i],
+                x.blocks[i];
+                epsilon=inner_epsilon(t),
+                max_iteration=10000,
+                line_search=Adaptive(),
+                kwargs...,
+            )
+        end
+        return results[:x], results[:dual_gap]
+    end
 
 
     if trajectory
@@ -372,19 +396,18 @@ function alternating_projections(
         # Projection step:
         for i in 1:N
             # project the previous iterate on the i-th feasible region
-            x[i] = projection_step(x[mod(i - 2, N)+1], i, t)
+            x.blocks[i], dual_gaps[i] = projection_step(i, t)
         end
 
+        dual_gap = sum(dual_gaps)
+
         # Update gradients
         grad!(gradient, x)
 
-        # Update dual gaps
-        v = compute_extreme_point.(lmo.lmos, gradient)
-        dual_gap = fast_dot(x - v, gradient)
 
         # go easy on the memory - only compute if really needed
         if ((mod(t, print_iter) == 0 && verbose) || callback !== nothing)
-            infeas = infeasibility(x)
+            primal = dist2(x)
         end
 
         first_iter = false
@@ -393,12 +416,12 @@ function alternating_projections(
         if callback !== nothing
             state = CallbackState(
                 t,
-                infeas,
-                infeas - dual_gap,
+                primal,
+                primal - dual_gap,
                 dual_gap,
                 tot_time,
                 x,
-                v,
+                nothing,
                 nothing,
                 nothing,
                 nothing,
@@ -408,7 +431,7 @@ function alternating_projections(
                 step_type,
             )
             # @show state
-            if callback(state, infeas) === false
+            if callback(state, primal) === false
                 break
             end
         end
@@ -419,9 +442,9 @@ function alternating_projections(
     # this is important as some variants do not recompute f(x) and the dual_gap regularly but only when reporting
     # hence the final computation.
     step_type = ST_LAST
-    infeas = infeasibility(x)
+    primal = dist2(x)
     grad!(gradient, x)
-    v = compute_extreme_point.(lmo.lmos, gradient)
+    v = compute_extreme_point(lmo, gradient)
     dual_gap = fast_dot(x - v, gradient)
 
     tot_time = (time_ns() - time_start) / 1.0e9
@@ -429,8 +452,8 @@ function alternating_projections(
     if callback !== nothing
         state = CallbackState(
             t,
-            infeas,
-            infeas - dual_gap,
+            primal,
+            primal - dual_gap,
             dual_gap,
             tot_time,
             x,
@@ -443,9 +466,9 @@ function alternating_projections(
             gradient,
             step_type,
         )
-        callback(state, infeas)
+        callback(state, primal)
     end
 
-    return x, v, dual_gap, infeas, traj_data
+    return x, v, dual_gap, primal, traj_data
 
 end
diff --git a/src/blended_pairwise.jl b/src/blended_pairwise.jl
index e2efec4d2..a42862063 100644
--- a/src/blended_pairwise.jl
+++ b/src/blended_pairwise.jl
@@ -1,4 +1,3 @@
-
 """
     blended_pairwise_conditional_gradient(f, grad!, lmo, x0; kwargs...)
 
@@ -325,7 +324,6 @@ function blended_pairwise_conditional_gradient(
                     linesearch_workspace,
                     memory_mode,
                 )
-
                 if callback !== nothing
                     state = CallbackState(
                         t,
diff --git a/src/block_coordinate_algorithms.jl b/src/block_coordinate_algorithms.jl
index deb8a5a25..0d034a7d3 100644
--- a/src/block_coordinate_algorithms.jl
+++ b/src/block_coordinate_algorithms.jl
@@ -41,20 +41,57 @@ end
 
 StochasticUpdate() = StochasticUpdate(-1)
 
-mutable struct DualGapOrder <: BlockCoordinateUpdateOrder
+"""
+The dual gap order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual gaps.
+"""
+struct DualGapOrder <: BlockCoordinateUpdateOrder
     limit::Int
 end
 
 DualGapOrder() = DualGapOrder(-1)
 
+"""
+The dual progress order initiates one round of `ļimit` many updates.
+The according blocks are sampled with probabilties proportional to their respective dual progress.
+"""
 mutable struct DualProgressOrder <: BlockCoordinateUpdateOrder
     limit::Int
     previous_dual_gaps::Vector{Float64}
     dual_progress::Vector{Float64}
+    last_indices::Vector{Int}
 end
 
-DualProgressOrder() = DualProgressOrder(-1, [], [])
-DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [])
+DualProgressOrder() = DualProgressOrder(-1, [], [], [])
+DualProgressOrder(i::Int64) = DualProgressOrder(i, [], [], [])
+
+"""
+The Lazy update order is discussed in "Flexible block-iterative
+analysis for the Frank-Wolfe algorithm," by Braun, Pokutta, &
+Woodstock (2024). 
+'lazy_block' is an index of a computationally expensive block to
+update;
+'refresh_rate' describes the frequency at which we perform
+a full activation; and
+'block_size' describes the number of "faster" blocks
+(i.e., those excluding 'lazy_block') activated (chosen
+uniformly at random) during each
+of the "faster" iterations; for more detail, see the
+article. If  'block_size' is unspecified, this defaults to
+1. 
+Note: This methodology is currently only proven to work
+with 'FrankWolfe.Shortstep' linesearches and a (not-yet
+implemented) adaptive method; see the article for details.
+"""
+struct LazyUpdate <: BlockCoordinateUpdateOrder 
+    lazy_block::Int
+    refresh_rate::Int
+    block_size::Int
+end
+
+function LazyUpdate(lazy_block::Int,refresh_rate::Int)
+    return LazyUpdate(lazy_block, refresh_rate, 1)
+end
 
 function select_update_indices(::FullUpdate, s::CallbackState, _)
     return [1:length(s.lmo.lmos)]
@@ -94,6 +131,19 @@ function select_update_indices(u::StochasticUpdate, s::CallbackState, _)
     return [[rand(1:l)] for i in 1:u.limit]
 end
 
+function sample_without_replacement(n::Int, weights::Vector{Float64})
+    weights = weights ./ sum(weights)
+    cumsum_weights = cumsum(weights)
+    indices = zeros(Int, n)
+    for i in 1:n
+        indices[i] = findfirst(cumsum_weights .> rand())
+        weights[indices[i]] = 0
+        weights = weights ./ sum(weights)
+        cumsum_weights = cumsum(weights)
+    end
+    return indices
+end
+
 function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps)
 
     l = length(s.lmo.lmos)
@@ -101,22 +151,32 @@ function select_update_indices(u::DualProgressOrder, s::CallbackState, dual_gaps
     @assert u.limit <= l
     @assert u.limit > 0 || u.limit == -1
 
-    # In the first iteration update every block so we get finite dual progress
-    if s.t < 2 
-        u.previous_dual_gaps = copy(dual_gaps)
-        return [[i] for i=1:l] 
-    end
-
-    if s.t == 1
-        u.dual_progress = dual_gaps - u.previous_dual_gaps
+    # In the first two iterations update every block so we get finite dual progress
+    if s.t < 2
+        u.dual_progress = ones(l)
+        indices = [[i for i=1:l]]
     else
-        diff = dual_gaps - u.previous_dual_gaps
-        u.dual_progress = [d == 0 ? u.dual_progress[i] : d for (i, d) in enumerate(diff)]
+        # Update dual progress only on updated blocks
+        for i in u.last_indices
+            d = u.previous_dual_gaps[i] - dual_gaps[i]
+            if d < 0
+                u.dual_progress[i] = 0
+            else
+                u.dual_progress[i] = d
+            end
+        end
+        n = u.limit == -1 ? l : u.limit
+
+        # If less than n blocks have non-zero dual progress, update all of them
+        if sum(u.dual_progress .!= 0) < n
+            indices = [[i for i=1:l]]
+        else
+            indices = [sample_without_replacement(n, u.dual_progress)]
+        end
     end
     u.previous_dual_gaps = copy(dual_gaps)
-
-    n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(u.dual_progress/sum(u.dual_progress)) .> rand())] for _=1:n]
+    u.last_indices = vcat(indices...)
+    return indices
 end
 
 function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
@@ -128,11 +188,20 @@ function select_update_indices(u::DualGapOrder, s::CallbackState, dual_gaps)
 
     # In the first iteration update every block so we get finite dual gaps
     if s.t < 1
-        return [[i] for i=1:l] 
+        return [[i for i=1:l]]
     end
 
     n = u.limit == -1 ? l : u.limit
-    return [[findfirst(cumsum(dual_gaps/sum(dual_gaps)) .> rand())] for _=1:n]
+    return [sample_without_replacement(n, dual_gaps)]
+end
+
+function select_update_indices(update::LazyUpdate, s::CallbackState, dual_gaps)
+    #Returns a sequence of randomized cheap indices by 
+    #excluding update.lazy_block until "refresh_rate" updates
+    #occur, then adds an update of everything while mainting
+    #randomized order.
+    l = length(s.lmo.lmos)
+    return push!([[rand(range(1,l)[1:l .!= update.lazy_block]) for _ in range(1,update.block_size)] for _ in 1:(update.refresh_rate -1)], range(1,l))
 end
 
 """
@@ -200,19 +269,18 @@ mutable struct BPCGStep <: UpdateStep
     phi::Float64
 end
 
-function Base.copy(::FrankWolfeStep)
-    return FrankWolfeStep()
-end
+Base.copy(::FrankWolfeStep) = FrankWolfeStep()
 
 function Base.copy(obj::BPCGStep)
     if obj.active_set === nothing
-        return BPCGStep(false, nothing, obj.renorm_interval, obj.lazy_tolerance, Inf)
+        return BPCGStep(obj.lazy, nothing, obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     else
         return BPCGStep(obj.lazy, copy(obj.active_set), obj.renorm_interval, obj.lazy_tolerance, obj.phi)
     end
 end
 
 BPCGStep() = BPCGStep(false, nothing, 1000, 2.0, Inf)
+BPCGStep(lazy::Bool) = BPCGStep(lazy, nothing, 1000, 2.0, Inf)
 
 function update_iterate(
     ::FrankWolfeStep,
@@ -287,7 +355,7 @@ function update_iterate(
 
     # minor modification from original paper for improved sparsity
     # (proof follows with minor modification when estimating the step)
-    if local_gap > s.phi / s.lazy_tolerance
+    if local_gap > max(s.phi / s.lazy_tolerance, 0) # Robust condition to not drop the zero-vector if the dual_gap is negative by inaccuracy
         d = muladd_memory_mode(memory_mode, d, a, v_local)
         vertex_taken = v_local
         gamma_max = a_lambda
diff --git a/src/linesearch.jl b/src/linesearch.jl
index dd27e44d1..f974b9c38 100644
--- a/src/linesearch.jl
+++ b/src/linesearch.jl
@@ -369,28 +369,31 @@ Convergence is not guaranteed in general.
 # References
 - [Secant Method](https://en.wikipedia.org/wiki/Secant_method)
 """
-struct Secant{F} <: LineSearchMethod
+struct Secant{F,LSM<:LineSearchMethod} <: LineSearchMethod
+    inner_ls::LSM
     limit_num_steps::Int
     tol::Float64
     domain_oracle::F
 end
 
 function Secant(limit_num_steps, tol)
-    return Secant(limit_num_steps, tol, x -> true)
+    return Secant(Backtracking(), limit_num_steps, tol, x -> true)
 end
 
-function Secant(; limit_num_steps=40, tol=1e-8)
-    return Secant(limit_num_steps, tol)
+function Secant(;inner_ls=Backtracking(), limit_num_steps=40, tol=1e-8, domain_oracle=(x -> true))
+    return Secant(inner_ls, limit_num_steps, tol, domain_oracle)
 end
 
-mutable struct SecantWorkspace{XT,GT}
+mutable struct SecantWorkspace{XT,GT, IWS}
+    inner_ws::IWS
     x::XT
     gradient::GT
     last_gamma::Float64
 end
 
-function build_linesearch_workspace(::Secant, x, gradient)
-    return SecantWorkspace(similar(x), similar(gradient), 1.0)  # Initialize last_gamma to 1.0
+function build_linesearch_workspace(ls::Secant, x, gradient)
+    inner_ws = build_linesearch_workspace(ls.inner_ls, x, gradient)
+    return SecantWorkspace(inner_ws, similar(x), similar(gradient), 1.0)  # Initialize last_gamma to 1.0
 end
 
 function perform_line_search(
@@ -422,7 +425,7 @@ function perform_line_search(
     while abs(dot_gdir) > line_search.tol
         if i > line_search.limit_num_steps
             workspace.last_gamma = best_gamma  # Update last_gamma before returning
-            return best_gamma
+            break
         end
 
         grad!(grad_storage, storage)
@@ -430,7 +433,7 @@ function perform_line_search(
 
         if dot_gdir_new ≈ dot_gdir
             workspace.last_gamma = best_gamma  # Update last_gamma before returning
-            return best_gamma
+            break
         end
 
         gamma_new = gamma - dot_gdir_new * (gamma - gamma_prev) / (dot_gdir_new - dot_gdir)
@@ -448,6 +451,33 @@ function perform_line_search(
         dot_gdir = dot_gdir_new
         i += 1
     end
+    if abs(dot_gdir) > line_search.tol
+        # Choose gamma_max to be domain feasible
+        storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d)
+        while !line_search.domain_oracle(storage)
+            gamma_max /= 2
+            storage = muladd_memory_mode(memory_mode, storage, x, gamma_max, d)
+        end
+        gamma = perform_line_search(
+            line_search.inner_ls,
+            0,
+            f,
+            grad!,
+            gradient,
+            x,
+            d,
+            gamma_max,
+            workspace.inner_ws,
+            memory_mode,
+        )
+
+        storage = muladd_memory_mode(memory_mode, storage, x, gamma, d)
+        new_val = f(storage)
+
+        if new_val <= best_val
+            best_gamma = gamma
+        end
+    end
     workspace.last_gamma = best_gamma  # Update last_gamma before returning
     return best_gamma
 end
diff --git a/src/moi_oracle.jl b/src/moi_oracle.jl
index a0c1b3d01..ba37356b0 100644
--- a/src/moi_oracle.jl
+++ b/src/moi_oracle.jl
@@ -1,6 +1,6 @@
 
 """
-    MathOptLMO{OT <: MOI.Optimizer} <: LinearMinimizationOracle
+    MathOptLMO{OT <: MOI.AbstractOptimizer} <: LinearMinimizationOracle
 
 Linear minimization oracle with feasible space defined through a MathOptInterface.Optimizer.
 The oracle call sets the direction and reruns the optimizer.
diff --git a/src/pairwise.jl b/src/pairwise.jl
index a06ca2b19..b6e8973a4 100644
--- a/src/pairwise.jl
+++ b/src/pairwise.jl
@@ -258,28 +258,28 @@ function pairwise_frank_wolfe(
 
             gamma = min(gamma_max, gamma)
 
-            # cleanup and renormalize every x iterations. Only for the fw steps.
-            renorm = mod(t, renorm_interval) == 0
             # away update
             active_set_update!(
                 active_set,
                 -gamma,
                 away_vertex,
-                true,
+                false,
                 away_index,
                 add_dropped_vertices=use_extra_vertex_storage,
                 vertex_storage=extra_vertex_storage,
             )
-            if add_dropped_vertices && gamma == gamma_max
+            if add_dropped_vertices && gamma ≈ gamma_max
                 for vtx in active_set.atoms
                     if vtx != v
                         push!(extra_vertex_storage, vtx)
                     end
                 end
             end
-            # fw update 
-            active_set_update!(active_set, gamma, fw_vertex, renorm, fw_index)
+            # fw update
+            active_set_update!(active_set, gamma, fw_vertex, true, fw_index)
         end
+        # println(active_set.weights)
+        # println([atom[1] for atom in active_set.atoms])
 
         if callback !== nothing
             state = CallbackState(
@@ -328,6 +328,7 @@ function pairwise_frank_wolfe(
     v = compute_extreme_point(lmo, gradient)
     primal = f(x)
     dual_gap = fast_dot(x, gradient) - fast_dot(v, gradient)
+    dual_gap = min(phi_value, dual_gap)
     step_type = ST_LAST
     tot_time = (time_ns() - time_start) / 1e9
     if callback !== nothing
diff --git a/src/types.jl b/src/types.jl
index 0a00c2d16..e5bc9b266 100644
--- a/src/types.jl
+++ b/src/types.jl
@@ -256,55 +256,60 @@ Base.@propagate_inbounds function muladd_memory_mode(::InplaceEmphasis, x::Matri
 end
 
 """
-    SymmetricArray{T, DT}
+    SubspaceVector{HasMultiplicities, T}
 
+Companion structure of `SubspaceLMO` containing three fields:
+- `data` is the full structure to be deflated,
+- `vec` is a vector in the reduced subspace in which computations are performed,
+- `mul` is only used to compute scalar products when `HasMultiplicities = true`,
+which should be avoided (when possible) for performance reasons.
 """
-struct SymmetricArray{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T}
+struct SubspaceVector{HasMultiplicities,T,DT,V<:AbstractVector{T}} <: AbstractVector{T}
     data::DT # full array to be symmetrised, will generally fall out of sync wrt vec
     vec::V # vector representing the array
     mul::Vector{T} # only used for scalar products
 end
 
-function SymmetricArray(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T}
-    return SymmetricArray{false,T,DT,V}(data, vec, T[])
+function SubspaceVector(data::DT, vec::V) where {DT,V<:AbstractVector{T}} where {T}
+    return SubspaceVector{false,T,DT,V}(data, vec, T[])
 end
 
-function SymmetricArray(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T}
-    return SymmetricArray{true,T,DT,V}(data, vec, convert(Vector{T}, mul))
+function SubspaceVector(data::DT, vec::V, mul::Vector) where {DT,V<:AbstractVector{T}} where {T}
+    return SubspaceVector{true,T,DT,V}(data, vec, convert(Vector{T}, mul))
 end
 
-Base.@propagate_inbounds function Base.getindex(A::SymmetricArray, i)
+Base.@propagate_inbounds function Base.getindex(A::SubspaceVector, i)
     @boundscheck checkbounds(A.vec, i)
     return @inbounds getindex(A.vec, i)
 end
 
-Base.@propagate_inbounds function Base.setindex!(A::SymmetricArray, x, i)
+Base.@propagate_inbounds function Base.setindex!(A::SubspaceVector, x, i)
     @boundscheck checkbounds(A.vec, i)
     return @inbounds setindex!(A.vec, x, i)
 end
 
-Base.size(A::SymmetricArray) = size(A.vec)
-Base.eltype(A::SymmetricArray) = eltype(A.vec)
-Base.similar(A::SymmetricArray{true}) = SymmetricArray(similar(A.data), similar(A.vec), A.mul)
-Base.similar(A::SymmetricArray{false}) = SymmetricArray(similar(A.data), similar(A.vec))
-Base.similar(A::SymmetricArray{true}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul))
-Base.similar(A::SymmetricArray{false}, ::Type{T}) where {T} = SymmetricArray(similar(A.data, T), similar(A.vec, T))
-Base.collect(A::SymmetricArray{true}) = SymmetricArray(collect(A.data), collect(A.vec), A.mul)
-Base.collect(A::SymmetricArray{false}) = SymmetricArray(collect(A.data), collect(A.vec))
-Base.copyto!(dest::SymmetricArray, src::SymmetricArray) = copyto!(dest.vec, src.vec)
-Base.:*(scalar::Real, A::SymmetricArray{true}) = SymmetricArray(A.data, scalar * A.vec, A.mul)
-Base.:*(scalar::Real, A::SymmetricArray{false}) = SymmetricArray(A.data, scalar * A.vec)
-Base.:*(A::SymmetricArray, scalar::Real) = scalar * A
-Base.:/(A::SymmetricArray, scalar::Real) = inv(scalar) * A
-Base.:+(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec, A1.mul)
-Base.:+(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec + A2.vec)
-Base.:-(A1::SymmetricArray{true,T}, A2::SymmetricArray{true,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec, A1.mul)
-Base.:-(A1::SymmetricArray{false,T}, A2::SymmetricArray{false,T}) where {T} = SymmetricArray(A1.data, A1.vec - A2.vec)
-Base.:-(A::SymmetricArray{true,T}) where {T} = SymmetricArray(A.data, -A.vec, A.mul)
-Base.:-(A::SymmetricArray{false,T}) where {T} = SymmetricArray(A.data, -A.vec)
-
-LinearAlgebra.dot(A1::SymmetricArray{true}, A2::SymmetricArray{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec)
-LinearAlgebra.dot(A1::SymmetricArray{false}, A2::SymmetricArray{false}) = dot(A1.vec, A2.vec)
-LinearAlgebra.norm(A::SymmetricArray) = sqrt(dot(A, A))
-
-Base.@propagate_inbounds Base.isequal(A1::SymmetricArray, A2::SymmetricArray) = isequal(A1.vec, A2.vec)
+Base.size(A::SubspaceVector) = size(A.vec)
+Base.eltype(A::SubspaceVector) = eltype(A.vec)
+Base.similar(A::SubspaceVector{true}) = SubspaceVector(similar(A.data), similar(A.vec), A.mul)
+Base.similar(A::SubspaceVector{false}) = SubspaceVector(similar(A.data), similar(A.vec))
+Base.similar(A::SubspaceVector{true}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T), convert(Vector{T}, A.mul))
+Base.similar(A::SubspaceVector{false}, ::Type{T}) where {T} = SubspaceVector(similar(A.data, T), similar(A.vec, T))
+Base.collect(A::SubspaceVector{true}) = SubspaceVector(collect(A.data), collect(A.vec), A.mul)
+Base.collect(A::SubspaceVector{false}) = SubspaceVector(collect(A.data), collect(A.vec))
+Base.copyto!(dest::SubspaceVector, src::SubspaceVector) = copyto!(dest.vec, src.vec)
+Base.:*(scalar::Real, A::SubspaceVector{true}) = SubspaceVector(A.data, scalar * A.vec, A.mul)
+Base.:*(scalar::Real, A::SubspaceVector{false}) = SubspaceVector(A.data, scalar * A.vec)
+Base.:*(A::SubspaceVector, scalar::Real) = scalar * A
+Base.:/(A::SubspaceVector, scalar::Real) = inv(scalar) * A
+Base.:+(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec, A1.mul)
+Base.:+(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec + A2.vec)
+Base.:-(A1::SubspaceVector{true,T}, A2::SubspaceVector{true,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec, A1.mul)
+Base.:-(A1::SubspaceVector{false,T}, A2::SubspaceVector{false,T}) where {T} = SubspaceVector(A1.data, A1.vec - A2.vec)
+Base.:-(A::SubspaceVector{true,T}) where {T} = SubspaceVector(A.data, -A.vec, A.mul)
+Base.:-(A::SubspaceVector{false,T}) where {T} = SubspaceVector(A.data, -A.vec)
+
+LinearAlgebra.dot(A1::SubspaceVector{true}, A2::SubspaceVector{true}) = dot(A1.vec, Diagonal(A1.mul), A2.vec)
+LinearAlgebra.dot(A1::SubspaceVector{false}, A2::SubspaceVector{false}) = dot(A1.vec, A2.vec)
+LinearAlgebra.norm(A::SubspaceVector) = sqrt(dot(A, A))
+
+Base.@propagate_inbounds Base.isequal(A1::SubspaceVector, A2::SubspaceVector) = isequal(A1.vec, A2.vec)
diff --git a/src/utils.jl b/src/utils.jl
index 02f14eb71..f7f071a35 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -163,6 +163,77 @@ function fast_dot(A::Matrix{T1}, B::SparseArrays.SparseMatrixCSC{T2}) where {T1,
     return s
 end
 
+fast_dot(a, Q, b) = dot(a, Q, b)
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::AbstractVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    d = Q.diag
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    return sum(eachindex(nzvals); init=zero(eltype(a))) do nzidx
+        nzvals[nzidx] * d[nzinds[nzidx]] * b[nzinds[nzidx]]
+    end
+end
+
+function fast_dot(a::SparseArrays.AbstractSparseVector{<:Real}, Q::Diagonal{<:Real}, b::SparseArrays.AbstractSparseVector{<:Real})
+    if a === b
+        return _fast_quadratic_form_symmetric(a, Q)
+    end
+    n = length(a)
+    if length(b) != n
+        throw(
+            DimensionMismatch("Vector a has a length $n but b has a length $(length(b))")
+        )
+    end
+    anzind = SparseArrays.nonzeroinds(a)
+    bnzind = SparseArrays.nonzeroinds(b)
+    anzval = SparseArrays.nonzeros(a)
+    bnzval = SparseArrays.nonzeros(b)
+    s = zero(Base.promote_eltype(a, Q, b))
+
+    if isempty(anzind) || isempty(bnzind)
+        return s
+    end
+
+    a_idx = 1
+    b_idx = 1
+    a_idx_last = length(anzind)
+    b_idx_last = length(bnzind)
+
+    # go through the nonzero indices of a and b simultaneously
+    @inbounds while a_idx <= a_idx_last && b_idx <= b_idx_last
+        ia = anzind[a_idx]
+        ib = bnzind[b_idx]
+        if ia == ib
+            s += dot(anzval[a_idx], Q.diag[ia], bnzval[b_idx])
+            a_idx += 1
+            b_idx += 1
+        elseif ia < ib
+            a_idx += 1
+        else
+            b_idx += 1
+        end
+    end
+    return s
+end
+
+
+function _fast_quadratic_form_symmetric(a, Q)
+    d = Q.diag
+    if length(d) != length(a)
+        throw(DimensionMismatch())
+    end
+    nzvals = SparseArrays.nonzeros(a)
+    nzinds = SparseArrays.nonzeroinds(a)
+    s = zero(Base.promote_eltype(a, Q))
+    @inbounds for nzidx in eachindex(nzvals)
+        s += nzvals[nzidx]^2 * d[nzinds[nzidx]]
+    end
+    return s
+end
+
 """
     trajectory_callback(storage)
 
diff --git a/test/active_set.jl b/test/active_set.jl
index 101b17d07..29c98f62a 100644
--- a/test/active_set.jl
+++ b/test/active_set.jl
@@ -4,7 +4,7 @@ using FrankWolfe
 import FrankWolfe: ActiveSet
 using LinearAlgebra: norm
 
-@testset "Active sets" begin
+@testset "Active sets                                       " begin
 
     @testset "Constructors and eltypes" begin
         active_set = ActiveSet([(0.1, [1, 2, 3]), (0.9, [2, 3, 4]), (0.0, [5, 6, 7])])
@@ -100,7 +100,7 @@ using LinearAlgebra: norm
     end
 end
 
-@testset "Simplex gradient descent" begin
+@testset "Simplex gradient descent                          " begin
     # Gradient descent over a 2-D unit simplex
     # each atom is a vertex, direction points to [1,1]
     # note: integers for atom element types
@@ -175,7 +175,7 @@ end
     end
 end
 
-@testset "LP separation oracle" begin
+@testset "LP separation oracle                              " begin
     # Gradient descent over a L-inf ball of radius one
     # current active set contains 3 vertices
     # direction points to [1,1]
@@ -212,7 +212,7 @@ end
     )
 end
 
-@testset "Argminmax" begin
+@testset "Argminmax                                         " begin
     active_set = FrankWolfe.ActiveSet([(0.6, [-1, -1]), (0.2, [0, 1]), (0.2, [1, 0])])
     (λ_min, a_min, i_min, val, λ_max, a_max, i_max, valM, progress) =
         FrankWolfe.active_set_argminmax(active_set, [1, 1.5])
@@ -221,7 +221,7 @@ end
     @test_throws ErrorException FrankWolfe.active_set_argminmax(active_set, [NaN, NaN])
 end
 
-@testset "LPseparationWithScaledHotVector" begin
+@testset "LPseparationWithScaledHotVector                   " begin
     v1 = FrankWolfe.ScaledHotVector(1, 1, 2)
     v2 = FrankWolfe.ScaledHotVector(1, 2, 2)
     v3 = FrankWolfe.ScaledHotVector(0, 2, 2)
@@ -240,7 +240,7 @@ end
     )
 end
 
-@testset "ActiveSet for BigFloat" begin
+@testset "ActiveSet for BigFloat                            " begin
     n = Int(1e2)
     lmo = FrankWolfe.LpNormLMO{BigFloat,1}(rand())
     x0 = Vector(FrankWolfe.compute_extreme_point(lmo, zeros(n)))
diff --git a/test/active_set_variants.jl b/test/active_set_variants.jl
index 59b4a1cad..c191389f7 100644
--- a/test/active_set_variants.jl
+++ b/test/active_set_variants.jl
@@ -9,7 +9,7 @@ function test_callback(state, active_set, args...)
     @test grad0 ≈ state.gradient
 end
 
-@testset "Testing active set Frank-Wolfe variants, BPFW, AFW, and PFW, including their lazy versions" begin
+@testset "Testing active set based Frank-Wolfe variants     " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         @. storage = 2x
@@ -134,7 +134,7 @@ end
     )
 end
 
-@testset "recompute or not last vertex" begin
+@testset "Recompute or not last vertex                      " begin
     n = 10
     f(x) = norm(x)^2
     function grad!(storage, x)
@@ -171,7 +171,7 @@ end
     @test lmo.counter == prev_counter - 1
 end
 
-@testset "Quadratic active set" begin
+@testset "Quadratic active set                              " begin
     n = 2 # number of dimensions
     p = 10 # number of points
     function simple_reg_loss(θ, data_point)
@@ -218,7 +218,7 @@ end
     end
     lmo = FrankWolfe.LpNormLMO{Float64, 2}(1.05 * norm(params_perfect))
     x0 = FrankWolfe.compute_extreme_point(lmo, zeros(Float64, n+1))
-    active_set = FrankWolfe.ActiveSetQuadratic([(1.0, x0)], gradf)
+    active_set = FrankWolfe.ActiveSetQuadraticProductCaching([(1.0, x0)], gradf)
     res = FrankWolfe.blended_pairwise_conditional_gradient(
         f,
         gradf,
diff --git a/test/alternating_methods_tests.jl b/test/alternating_methods_tests.jl
index 904de49d8..ddbd2bb8a 100644
--- a/test/alternating_methods_tests.jl
+++ b/test/alternating_methods_tests.jl
@@ -19,7 +19,7 @@ lmo1 = FrankWolfe.ScaledBoundLInfNormBall(-ones(n), zeros(n))
 lmo2 = FrankWolfe.ScaledBoundLInfNormBall(zeros(n), ones(n))
 lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
 
-@testset "Testing alternating linear minimization with block coordinate FW for different LMO-pairs " begin
+@testset "Testing ALM with block-coordinate FW              " begin
 
     x, _ = FrankWolfe.alternating_linear_minimization(
         FrankWolfe.block_coordinate_frank_wolfe,
@@ -27,7 +27,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1.0,
+        lambda=0.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -40,7 +40,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/3,
+        lambda=1/6,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -53,7 +53,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=1/9,
+        lambda=1/18,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -66,7 +66,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
+        lambda=1.5,
         line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
     )
 
@@ -102,7 +102,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         (storage, x) -> storage .= zero(x),
         (lmo1, lmo3),
         ones(n),
-        verbose=true,
+        verbose=false,
         line_search=FrankWolfe.Shortstep(2),
     )
 
@@ -116,7 +116,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         (lmo1, lmo_prob),
         ones(n),
         trajectory=true,
-        verbose=true,
+        verbose=false,
     )
 
     @test abs(x.blocks[1][1]) < 1e-6
@@ -134,6 +134,7 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
         ones(n),
         line_search=FrankWolfe.Agnostic(),
         momentum=0.9,
+        lambda=0.5
     )
 
     @test abs(x.blocks[1][1] - 0.5 / n) < 1e-3
@@ -141,10 +142,10 @@ lmo3 = FrankWolfe.ScaledBoundLInfNormBall(ones(n), 2 * ones(n))
 
 end
 
-@testset "Testing different update orders for block coordinate FW in within alternating linear minimization" begin
+@testset "Testing update orders for block-coordinate ALM-FW " begin
 
     orders = [
-        FrankWolfe.FullUpdate(), 
+        FrankWolfe.FullUpdate(),
         [FrankWolfe.CyclicUpdate(i) for i in [-1, 1, 2]]...,
         [FrankWolfe.StochasticUpdate(i) for i in [-1, 1, 2]]...,
         [FrankWolfe.DualGapOrder(i) for i in [-1, 1, 2]]...,
@@ -160,13 +161,14 @@ end
             ones(n),
             line_search=FrankWolfe.Adaptive(relaxed_smoothness=true),
             update_order=order,
+            lambda=0.5,
         )
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-5
         @test abs(x.blocks[2][1] - 1 / n) < 1e-5
     end
 end
 
-@testset "Testing alternating linear minimization with different FW methods" begin
+@testset "Testing ALM with different FW methods             " begin
 
     methods = [
         FrankWolfe.frank_wolfe,
@@ -181,6 +183,7 @@ end
             grad!,
             (lmo2, lmo_prob),
             ones(n),
+            lambda=0.5
         )
 
         @test abs(x.blocks[1][1] - 0.5 / n) < 1e-6
@@ -188,7 +191,7 @@ end
     end
 end
 
-@testset "Testing block-coordinate FW with different update steps and linesearch strategies" begin
+@testset "Testing stepsize/linesearch in block-coordinate FW" begin
 
     x, _, _, _, _ = FrankWolfe.alternating_linear_minimization(
         FrankWolfe.block_coordinate_frank_wolfe,
@@ -220,7 +223,7 @@ end
         grad!,
         (lmo1, lmo2),
         ones(n),
-        update_step=FrankWolfe.BPCGStep(true, nothing, 1000, false, Inf),
+        update_step=FrankWolfe.BPCGStep(true),
     )
 
     @test abs(x.blocks[1][1]) < 1e-6
@@ -244,8 +247,8 @@ end
         grad!,
         (lmo_nb, lmo_prob),
         ones(n),
-        lambda=3.0,
-        line_search=(FrankWolfe.Shortstep(8.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 2+2*λ
+        lambda=1.5,
+        line_search=(FrankWolfe.Shortstep(4.0), FrankWolfe.Adaptive()), # L-smooth in coordinates for L = 1+2*λ
         update_step=(FrankWolfe.BPCGStep(), FrankWolfe.FrankWolfeStep()),
     )
 
@@ -253,35 +256,35 @@ end
     @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 end
 
-@testset "Testing alternating projections for different LMO-pairs " begin
+@testset "Testing alternating projections                   " begin
 
-    x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=true)
+    x, _, _, _, _ = FrankWolfe.alternating_projections((lmo1, lmo_prob), rand(n), verbose=false)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, _, _ = FrankWolfe.alternating_projections((lmo3, lmo_prob), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-6
-    @test abs(x[2][1] - 1 / n) < 1e-6
+    @test abs(x.blocks[1][1] - 1) < 1e-6
+    @test abs(x.blocks[2][1] - 1 / n) < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo1, lmo2), rand(n))
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1]) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1]) < 1e-6
     @test infeas < 1e-6
 
     x, _, _, infeas, _ = FrankWolfe.alternating_projections((lmo2, lmo3), rand(n))
 
-    @test abs(x[1][1] - 1) < 1e-4
-    @test abs(x[2][1] - 1) < 1e-4
+    @test abs(x.blocks[1][1] - 1) < 1e-4
+    @test abs(x.blocks[2][1] - 1) < 1e-4
     @test infeas < 1e-6
 
     x, _, _, infeas, traj_data =
         FrankWolfe.alternating_projections((lmo1, lmo3), rand(n), trajectory=true)
 
-    @test abs(x[1][1]) < 1e-6
-    @test abs(x[2][1] - 1) < 1e-6
+    @test abs(x.blocks[1][1]) < 1e-6
+    @test abs(x.blocks[2][1] - 1) < 1e-6
     @test traj_data != []
     @test length(traj_data[1]) == 5
     @test length(traj_data) >= 2
diff --git a/test/as_quadratic_projection.jl b/test/as_quadratic_projection.jl
new file mode 100644
index 000000000..66f76e488
--- /dev/null
+++ b/test/as_quadratic_projection.jl
@@ -0,0 +1,116 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+
+import HiGHS
+import MathOptInterface as MOI
+using Test
+
+n = Int(1e2)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+# @info "Seed $s"
+Random.seed!(s)
+
+
+xpi = rand(n);
+total = sum(xpi);
+
+xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+# lmo = FrankWolfe.KSparseLMO(5, 1000.0)
+
+## other LMOs to try
+# lmo_big = FrankWolfe.KSparseLMO(100, big"1.0")
+# lmo = FrankWolfe.LpNormLMO{Float64,5}(100.0)
+# lmo = FrankWolfe.ProbabilitySimplexOracle(100.0);
+lmo = FrankWolfe.UnitSimplexOracle(10000.0);
+
+x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+
+active_set_quadratic_automatic_standard = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_automatic_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic_standard,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic_standard),
+);
+
+active_set_quadratic_automatic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    grad!,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_automatic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_automatic,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_automatic),
+);
+
+active_set_quadratic_manual = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([(1.0, copy(x00))]),
+    2.0 * I, -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+    scheduler=FrankWolfe.LogScheduler(start_time=10, scaling_factor=1),
+)
+trajectoryBPCG_quadratic_manual = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    active_set_quadratic_manual,
+    max_iteration=k,
+    callback=build_callback(trajectoryBPCG_quadratic_manual),
+);
+
+traj_data = [
+    trajectoryBPCG_standard,
+    trajectoryBPCG_quadratic_automatic_standard,
+    trajectoryBPCG_quadratic_automatic,
+    trajectoryBPCG_quadratic_manual,
+]
+
+# all should have converged
+for traj in traj_data
+    @test traj[end][2] ≤ 1e-8
+    @test traj[end][4] ≤ 1e-7
+end
diff --git a/test/bcg_direction_error.jl b/test/bcg_direction_error.jl
index 6c1ab0dba..b0fa28b89 100644
--- a/test/bcg_direction_error.jl
+++ b/test/bcg_direction_error.jl
@@ -33,7 +33,7 @@ x, v, primal, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
     line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
     print_iter=100,
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=false,
     lazy_tolerance=1.0,
     weight_purge_threshold=1e-9,
@@ -55,7 +55,7 @@ x, v, primal_cut, dual_gap, _, _ = FrankWolfe.blended_conditional_gradient(
     line_search=FrankWolfe.AdaptiveZerothOrder(L_est=2.0),
     print_iter=k / 10,
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=false,
     lazy_tolerance=1.0,
     weight_purge_threshold=1e-10,
diff --git a/test/benchmark.jl b/test/benchmark.jl
index 38cc72253..0b6f13063 100644
--- a/test/benchmark.jl
+++ b/test/benchmark.jl
@@ -1,7 +1,6 @@
 import FrankWolfe
 import LinearAlgebra
 
-
 n = Int(1e6);
 
 xpi = rand(n);
diff --git a/test/blocks.jl b/test/blocks.jl
index 485716bde..ab2909ecb 100644
--- a/test/blocks.jl
+++ b/test/blocks.jl
@@ -4,7 +4,7 @@ using LinearAlgebra
 using FrankWolfe
 using SparseArrays
 
-@testset "Block array behaviour" begin
+@testset "Block array behavior                              " begin
     arr = FrankWolfe.BlockVector([
         [
             1 3
@@ -53,7 +53,7 @@ using SparseArrays
 
 end
 
-@testset "FrankWolfe array methods" begin
+@testset "Frank-Wolfe array methods                         " begin
     @testset "Block arrays" begin
         mem = FrankWolfe.InplaceEmphasis()
         arr0 = FrankWolfe.BlockVector([
diff --git a/test/extra_storage.jl b/test/extra_storage.jl
index 26b90055a..1465bceb6 100644
--- a/test/extra_storage.jl
+++ b/test/extra_storage.jl
@@ -11,7 +11,7 @@ function grad!(storage, x)
     return storage .= x .- center0
 end
 
-@testset "Blended Pairwise Conditional Gradient" begin
+@testset "Blended Pairwise Conditional Gradient             " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
@@ -59,7 +59,7 @@ end
     end
 end
 
-@testset "Away-Frank-Wolfe" begin
+@testset "Away-Frank-Wolfe                                  " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
@@ -107,7 +107,7 @@ end
     end
 end
 
-@testset "Blended Pairwise Conditional Gradient" begin
+@testset "Blended Pairwise Conditional Gradient             " begin
     lmo = FrankWolfe.UnitSimplexOracle(4.3)
     tlmo = FrankWolfe.TrackingLMO(lmo)
 
diff --git a/test/function_gradient.jl b/test/function_gradient.jl
index 4bfd1a734..01b10a7b0 100644
--- a/test/function_gradient.jl
+++ b/test/function_gradient.jl
@@ -7,7 +7,7 @@ import FrankWolfe: compute_gradient, compute_value, compute_value_gradient
 
 Random.seed!(123)
 
-@testset "Simple and stochastic function gradient interface" begin
+@testset "Simple and stochastic function gradient interface " begin
     for n in (1, 5, 20)
         A = rand(Float16, n, n)
         A .+= A'
diff --git a/test/generic-arrays.jl b/test/generic-arrays.jl
index eab5107a9..53e0a8a17 100644
--- a/test/generic-arrays.jl
+++ b/test/generic-arrays.jl
@@ -8,7 +8,7 @@ function Base.convert(::Type{GenericArray{Float64,1}}, v::FrankWolfe.ScaledHotVe
     return GenericArray(collect(v))
 end
 
-@testset "GenericArray is maintained" begin
+@testset "GenericArray is maintained                        " begin
     n = Int(1e3)
     k = n
 
@@ -29,7 +29,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -44,7 +44,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
         VType=FrankWolfe.ScaledHotVector{Float64},
     )
@@ -60,12 +60,12 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 end
 
-@testset "fast_equal special types" begin
+@testset "Testing fast_equal for special types              " begin
     @testset "ScaledHotVector" begin
         a = FrankWolfe.ScaledHotVector(3.5, 3, 10)
         b = FrankWolfe.ScaledHotVector(3.5, 3, 11)
diff --git a/test/lmo.jl b/test/lmo.jl
index 030b76fe0..b30f01cd8 100644
--- a/test/lmo.jl
+++ b/test/lmo.jl
@@ -14,7 +14,7 @@ import Clp
 import Hypatia
 using JuMP
 
-@testset "Simplex LMOs" begin
+@testset "Simplex LMOs                                      " begin
     n = 6
     direction = zeros(6)
     rhs = 10 * rand()
@@ -60,7 +60,7 @@ using JuMP
     end
 end
 
-@testset "Hypersimplex" begin
+@testset "Hypersimplex                                      " begin
     @testset "Hypersimplex $n $K" for n in (2, 5, 10), K in (1, min(n, 4))
         K = 1
         n = 5
@@ -191,7 +191,7 @@ end
     end
 end
 
-@testset "Lp-norm epigraph LMO" begin
+@testset "Lp-norm epigraph LMO                              " begin
     for n in (1, 2, 5, 10)
         τ = 5 + 3 * rand()
         # tests that the "special" p behaves like the "any" p, i.e. 2.0 and 2
@@ -234,7 +234,7 @@ end
     end
 end
 
-@testset "K-sparse polytope LMO" begin
+@testset "K-sparse polytope LMO                             " begin
     @testset "$n-dimension" for n in (1, 2, 10)
         τ = 5 + 3 * rand()
         for K in 1:n
@@ -264,7 +264,7 @@ end
     @test norm(v) > 0
 end
 
-@testset "Caching on simplex LMOs" begin
+@testset "Caching on simplex LMOs                           " begin
     n = 6
     direction = zeros(6)
     rhs = 10 * rand()
@@ -314,7 +314,7 @@ function _is_doubly_stochastic(m)
     end
 end
 
-@testset "Birkhoff polytope" begin
+@testset "Birkhoff polytope                                 " begin
     Random.seed!(42)
     lmo = FrankWolfe.BirkhoffPolytopeLMO()
     for n in (1, 2, 10)
@@ -358,7 +358,7 @@ end
     ]
 end
 
-@testset "Matrix completion and nuclear norm" begin
+@testset "Matrix completion and nuclear norm                " begin
     nfeat = 50
     nobs = 100
     r = 5
@@ -425,7 +425,7 @@ end
     )
 end
 
-@testset "Spectral norms" begin
+@testset "Spectral norms                                    " begin
     Random.seed!(42)
     o = Hypatia.Optimizer()
     MOI.set(o, MOI.Silent(), true)
@@ -514,7 +514,7 @@ end
     end
 end
 
-@testset "MOI oracle consistency" begin
+@testset "MOI oracle consistency                            " begin
     Random.seed!(42)
     o = GLPK.Optimizer()
     MOI.set(o, MOI.Silent(), true)
@@ -649,7 +649,7 @@ end
     end
 end
 
-@testset "MOI oracle on Birkhoff polytope" begin
+@testset "MOI oracle on Birkhoff polytope                   " begin
     o = GLPK.Optimizer()
     o_ref = GLPK.Optimizer()
     for n in (1, 2, 10)
@@ -687,7 +687,7 @@ end
     end
 end
 
-@testset "MOI oracle and KSparseLMO" begin
+@testset "MOI oracle and KSparseLMO                         " begin
     o_base = GLPK.Optimizer()
     cached = MOI.Utilities.CachingOptimizer(
         MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
@@ -751,7 +751,7 @@ end
     end
 end
 
-@testset "Product LMO" begin
+@testset "Product LMO                                       " begin
     lmo = FrankWolfe.ProductLMO(FrankWolfe.LpNormLMO{Inf}(3.0), FrankWolfe.LpNormLMO{1}(2.0))
     dinf = randn(10)
     d1 = randn(5)
@@ -770,7 +770,7 @@ end
     @test FrankWolfe.BlockVector([vinf, v1]) == v_block
 end
 
-@testset "Scaled L-1 norm polytopes" begin
+@testset "Scaled L-1 norm polytopes                         " begin
     lmo = FrankWolfe.ScaledBoundL1NormBall(-ones(10), ones(10))
     # equivalent to LMO
     lmo_ref = FrankWolfe.LpNormLMO{1}(1)
@@ -802,7 +802,7 @@ end
     @test v ≈ [-1, 0]
 end
 
-@testset "Scaled L-inf norm polytopes" begin
+@testset "Scaled L-inf norm polytopes                       " begin
     # tests ScaledBoundLInfNormBall for the standard hypercube, a shifted one, and a scaled one
     lmo = FrankWolfe.ScaledBoundLInfNormBall(-ones(10), ones(10))
     lmo_ref = FrankWolfe.LpNormLMO{Inf}(1)
@@ -847,7 +847,7 @@ end
     end
 end
 
-@testset "Copy MathOpt LMO" begin
+@testset "Copy MathOpt LMO                                  " begin
     o_clp = MOI.Utilities.CachingOptimizer(
         MOI.Utilities.UniversalFallback(MOI.Utilities.Model{Float64}()),
         Clp.Optimizer(),
@@ -869,7 +869,7 @@ end
     end
 end
 
-@testset "MathOpt LMO with BlockVector" begin
+@testset "MathOpt LMO with BlockVector                      " begin
     o = GLPK.Optimizer()
     MOI.set(o, MOI.Silent(), true)
     x = MOI.add_variables(o, 10)
@@ -888,7 +888,7 @@ end
 
 end
 
-@testset "Inplace LMO correctness" begin
+@testset "Inplace LMO correctness                           " begin
 
     V = [-6.0, -6.15703, -5.55986]
     M = [3.0 2.8464949 2.4178848; 2.8464949 3.0 2.84649498; 2.4178848 2.84649498 3.0]
@@ -912,7 +912,7 @@ end
     @test x_dense == x_standard
 end
 
-@testset "Ellipsoid LMO $n" for n in (2, 5, 10)
+@testset "Ellipsoid LMO $n                                   " for n in (2, 5, 9)
     A = zeros(n, n)
     A[1, 1] = 3
     @test_throws PosDefException FrankWolfe.EllipsoidLMO(A)
@@ -944,7 +944,7 @@ end
     @test dot(xv, d) ≈ dot(v, d) atol = 1e-5 * n
 end
 
-@testset "Convex hull" begin
+@testset "Convex hull                                       " begin
     lmo = FrankWolfe.ConvexHullOracle([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
     for _ in 1:100
         d = randn(3)
@@ -957,7 +957,7 @@ end
     @test v == lmo.vertices[1]
 end
 
-@testset "Symmetric LMO" begin
+@testset "Symmetric LMO                                     " begin
     # See examples/reynolds.jl
     struct BellCorrelationsLMO{T} <: FrankWolfe.LinearMinimizationOracle
         m::Int # number of inputs
@@ -1023,7 +1023,7 @@ end
     end
     reynolds_adjoint(gradient, lmo) = gradient
     lmo = BellCorrelationsLMO{Float64}(size(p, 1), zeros(size(p, 1)))
-    sym = FrankWolfe.SymmetricLMO(lmo, reynolds_permutedims, reynolds_adjoint)
+    sym = FrankWolfe.SubspaceLMO(lmo, reynolds_permutedims, reynolds_adjoint)
     x0 = FrankWolfe.compute_extreme_point(sym, -p)
     active_set = FrankWolfe.ActiveSet([(1.0, x0)])
     res = FrankWolfe.blended_pairwise_conditional_gradient(
@@ -1038,7 +1038,7 @@ end
     @test length(res[6]) < 25
 end
 
-@testset "Ordered Weighted Norm LMO" begin
+@testset "Ordered Weighted Norm LMO                         " begin
     Random.seed!(4321)
     N = Int(1e3)
     for _ in 1:10 
diff --git a/test/momentum_memory.jl b/test/momentum_memory.jl
index 45fa98001..5ec6aea50 100644
--- a/test/momentum_memory.jl
+++ b/test/momentum_memory.jl
@@ -8,7 +8,7 @@ n = Int(1e3)
 k = 10000
 
 s = rand(1:100)
-@info "Seed: $s"
+# @info "Seed: $s"
 
 Random.seed!(s)
 
@@ -47,7 +47,7 @@ xmem, _ = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2.0),
     memory_mode=FrankWolfe.InplaceEmphasis(),
-    verbose=true,
+    verbose=false,
     trajectory=true,
     momentum=0.9,
 )
diff --git a/test/oddities.jl b/test/oddities.jl
index 466a5019f..b64ecb901 100644
--- a/test/oddities.jl
+++ b/test/oddities.jl
@@ -2,7 +2,7 @@ import FrankWolfe
 using LinearAlgebra
 using Test
 
-@testset "Testing adaptive LS when already optimal and gradient is 0" begin
+@testset "Testing adaptive LS at optimality                 " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -19,7 +19,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -32,7 +32,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
     @test abs(
@@ -43,7 +43,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -58,7 +58,7 @@ using Test
             max_iteration=1000,
             lazy=true,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
     @test abs(
@@ -70,7 +70,7 @@ using Test
             max_iteration=1000,
             lazy=true,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
@@ -83,7 +83,7 @@ using Test
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Adaptive(),
-            verbose=true,
+            verbose=false,
         )[3],
     ) < 1.0e-10
 
diff --git a/test/quadratic_lp_active_set.jl b/test/quadratic_lp_active_set.jl
new file mode 100644
index 000000000..50bcdb6e6
--- /dev/null
+++ b/test/quadratic_lp_active_set.jl
@@ -0,0 +1,113 @@
+using FrankWolfe
+using LinearAlgebra
+using Random
+using Test
+import HiGHS
+import MathOptInterface as MOI
+
+n = Int(1e4)
+k = 5000
+
+s = 10
+# @info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.KSparseLMO(5, 1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+as_quad_direct = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_specialized = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_specialized),
+);
+
+as_quad_direct_generic = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    [(1.0, copy(x00))],
+    2 * Diagonal(ones(length(xp))),
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_direct_generic = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_generic,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_direct_generic),
+);
+
+as_quad_direct_basic_as = FrankWolfe.ActiveSetQuadraticLinearSolve(
+    FrankWolfe.ActiveSet([1.0], [copy(x00)], collect(x00)),
+    2 * LinearAlgebra.I,
+    -2xp,
+    MOI.instantiate(MOI.OptimizerWithAttributes(HiGHS.Optimizer, MOI.Silent() => true)),
+)
+
+trajectoryBPCG_quadratic_noqas = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    as_quad_direct_basic_as,
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_quadratic_noqas),
+);
+
+
+dual_gaps_quadratic_specialized = getindex.(trajectoryBPCG_quadratic_direct_specialized, 4)
+dual_gaps_quadratic_generic = getindex.(trajectoryBPCG_quadratic_direct_generic, 4)
+dual_gaps_quadratic_noqas = getindex.(trajectoryBPCG_quadratic_noqas, 4)
+dual_gaps_bpcg = getindex.(trajectoryBPCG_standard, 4)
+
+
+@test dual_gaps_quadratic_specialized[end] < dual_gaps_bpcg[end]
+@test dual_gaps_quadratic_noqas[end] < dual_gaps_bpcg[end]
+@test norm(dual_gaps_quadratic_noqas - dual_gaps_quadratic_noqas) ≤ k * 1e-5
diff --git a/test/rational_test.jl b/test/rational_test.jl
index e9725157a..415b33291 100644
--- a/test/rational_test.jl
+++ b/test/rational_test.jl
@@ -37,7 +37,7 @@ xmem, vmem, primal, dual_gap, trajectory = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Agnostic(),
     print_iter=k / 10,
-    verbose=true,
+    verbose=false,
     memory_mode=FrankWolfe.InplaceEmphasis(),
 )
 
@@ -49,7 +49,7 @@ xstep, _ = FrankWolfe.frank_wolfe(
     max_iteration=k,
     line_search=FrankWolfe.Shortstep(2 // 1),
     print_iter=k / 10,
-    verbose=true,
+    verbose=false,
 )
 
 @test eltype(xmem) == eltype(xstep) == Rational{BigInt}
diff --git a/test/runtests.jl b/test/runtests.jl
index d56d47a75..c99097de4 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -12,7 +12,7 @@ include("utils.jl")
 include("active_set_variants.jl")
 include("alternating_methods_tests.jl")
 
-@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing vanilla Frank-Wolfe                       " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -27,7 +27,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=10000,
             line_search=FrankWolfe.GeneralizedAgnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     g(x) = 2 + log(1 + log(x + 1))
@@ -39,7 +39,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=10000,
             line_search=FrankWolfe.GeneralizedAgnostic(g),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -50,7 +50,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -73,7 +73,7 @@ include("alternating_methods_tests.jl")
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -157,12 +157,12 @@ include("alternating_methods_tests.jl")
     ) < 1.0e-3
 end
 
-@testset "Gradient with momentum correctly updated" begin
+@testset "Gradient with momentum correctly updated          " begin
     # fixing https://github.com/ZIB-IOL/FrankWolfe.jl/issues/47
     include("momentum_memory.jl")
 end
 
-@testset "Testing Lazified Conditional Gradients with various step size strategies" begin
+@testset "Testing Lazified Conditional Gradients            " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         @. storage = 2x
@@ -178,7 +178,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -189,7 +189,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Backtracking(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -200,7 +200,7 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Shortstep(2.0),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -211,12 +211,12 @@ end
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.AdaptiveZerothOrder(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
 end
 
-@testset "Testing Lazified Conditional Gradients with cache strategies" begin
+@testset "Testing caching in Lazified Conditional Gradients " begin
     n = Int(1e5)
     L = 2
     k = 1000
@@ -237,7 +237,7 @@ end
         x0,
         max_iteration=k,
         line_search=FrankWolfe.Shortstep(2.0),
-        verbose=true,
+        verbose=false,
     )
 
     @test primal - 1 / n <= bound
@@ -270,7 +270,7 @@ end
     @test primal - 1 / n <= bound
 end
 
-@testset "Testing memory_mode blas vs memory" begin
+@testset "Testing memory_mode blas vs memory                " begin
     n = Int(1e5)
     k = 100
     xpi = rand(n)
@@ -337,7 +337,7 @@ end
         @test primal < f(x0)
     end
 end
-@testset "Testing rational variant" begin
+@testset "Testing rational variant                          " begin
     rhs = 1
     n = 40
     k = 1000
@@ -379,7 +379,7 @@ end
         line_search=FrankWolfe.Agnostic(),
         print_iter=k / 10,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
     @test eltype(x0) == eltype(x) == Rational{BigInt}
     @test f(x) <= 1e-4
@@ -395,7 +395,7 @@ end
         line_search=FrankWolfe.Shortstep(2 // 1),
         print_iter=k / 100,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
 
     x0 = FrankWolfe.compute_extreme_point(lmo, direction)
@@ -408,11 +408,11 @@ end
         line_search=FrankWolfe.Shortstep(2 // 1),
         print_iter=k / 10,
         memory_mode=FrankWolfe.InplaceEmphasis(),
-        verbose=true,
+        verbose=false,
     )
     @test eltype(x) == Rational{BigInt}
 end
-@testset "Multi-precision tests" begin
+@testset "Multi-precision tests                             " begin
     rhs = 1
     n = 100
     k = 1000
@@ -472,7 +472,7 @@ end
             line_search=FrankWolfe.AdaptiveZerothOrder(),
             print_iter=k / 10,
             memory_mode=FrankWolfe.InplaceEmphasis(),
-            verbose=true,
+            verbose=false,
         )
 
         @test eltype(x0) == T
@@ -487,7 +487,7 @@ end
             line_search=FrankWolfe.AdaptiveZerothOrder(),
             print_iter=k / 10,
             memory_mode=FrankWolfe.InplaceEmphasis(),
-            verbose=true,
+            verbose=false,
         )
 
         @test eltype(x0) == T
@@ -496,7 +496,7 @@ end
     end
 end
 
-@testset "Stochastic FW linear regression" begin
+@testset "Stochastic FW linear regression                   " begin
     function simple_reg_loss(θ, data_point)
         (xi, yi) = data_point
         (a, b) = (θ[1:end-1], θ[end])
@@ -583,7 +583,7 @@ end
     )
 end
 
-@testset "Away-step FW" begin
+@testset "Away-step FW                                      " begin
     n = 50
     lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0)
     x0 = FrankWolfe.compute_extreme_point(lmo_prob, rand(n))
@@ -614,7 +614,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -629,7 +629,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -645,7 +645,7 @@ end
         away_steps=false,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -660,7 +660,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -675,7 +675,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -691,7 +691,7 @@ end
         away_steps=false,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -706,7 +706,7 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.InplaceEmphasis(),
     )
 
@@ -722,12 +722,12 @@ end
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
         print_iter=k / 10,
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 end
 
-@testset "Blended conditional gradient" begin
+@testset "Testing Blended Conditional Gradient              " begin
     n = 50
     lmo_prob = FrankWolfe.ProbabilitySimplexOracle(1.0)
     x0 = FrankWolfe.compute_extreme_point(lmo_prob, randn(n))
@@ -745,7 +745,7 @@ end
         x0,
         max_iteration=k,
         line_search=FrankWolfe.Backtracking(),
-        verbose=true,
+        verbose=false,
         memory_mode=FrankWolfe.OutplaceEmphasis(),
     )
 
@@ -767,44 +767,64 @@ end
 
 end
 
-
 include("oddities.jl")
 include("tracking.jl")
 
 # in separate module for name space issues
 module BCGDirectionError
 using Test
-@testset "BCG direction accuracy" begin
+@testset "BCG direction accuracy                            " begin
     include("bcg_direction_error.jl")
 end
 end
 
 module RationalTest
 using Test
-@testset "Rational test and shortstep" begin
+@testset "Rational test and shortstep                       " begin
     include("rational_test.jl")
 end
 end
 
 module BCGAccel
 using Test
-@testset "BCG acceleration with different types" begin
+@testset "BCG acceleration with different types             " begin
     include("blended_accelerated.jl")
 end
 end
 
 module VertexStorageTest
 using Test
-@testset "Vertex storage" begin
+@testset "Vertex storage                                    " begin
     include("extra_storage.jl")
 end
 end
 
+module LpDirectSolveTest
+using Test
+@testset "LP solving for quadratic functions and active set " begin
+    include("quadratic_lp_active_set.jl")
+end
+end
+
+module LpDirectSolveTestProjection
+using Test
+@testset "LP solving for quadratic functions and active set " begin
+    include("as_quadratic_projection.jl")
+end
+end
+
+module SparsifyingActiveSetTest
+using Test
+@testset "Sparsifying active set                            " begin
+    include("sparsifying_activeset.jl")
+end
+end
+
 include("generic-arrays.jl")
 
 include("blocks.jl")
 
-@testset "End-to-end trajectory tests" begin
+@testset "End-to-end trajectory tests                       " begin
     trajectory_testfiles = readdir(joinpath(@__DIR__, "trajectory_tests"), join=true)
     for file in trajectory_testfiles
         @eval Module() begin
diff --git a/test/sparsifying_activeset.jl b/test/sparsifying_activeset.jl
new file mode 100644
index 000000000..1cf8d8299
--- /dev/null
+++ b/test/sparsifying_activeset.jl
@@ -0,0 +1,85 @@
+#=
+This example demonstrates the use of the Blended Pairwise Conditional Gradient algorithm
+with direct solve steps for a quadratic optimization problem over a sparse polytope.
+
+The example showcases how the algorithm balances between:
+- Pairwise steps for efficient optimization
+- Periodic direct solves for handling the quadratic objective
+- Lazy (approximate) linear minimization steps for improved iteration complexity
+
+It also demonstrates how to set up custom callbacks for tracking algorithm progress.
+=#
+
+using LinearAlgebra
+using Test
+using Random
+using FrankWolfe
+import HiGHS
+
+n = Int(1e4)
+k = 10000
+
+# s = rand(1:100)
+s = 10
+# @info "Seed $s"
+Random.seed!(s)
+
+xpi = rand(n);
+total = sum(xpi);
+
+# here the optimal solution lies in the interior if you want an optimal solution on a face and not the interior use:
+# const xp = xpi;
+
+const xp = xpi ./ total;
+
+f(x) = norm(x - xp)^2
+function grad!(storage, x)
+    @. storage = 2 * (x - xp)
+end
+
+lmo = FrankWolfe.LpNormLMO{Float64,5}(1.0)
+
+const x00 = FrankWolfe.compute_extreme_point(lmo, rand(n))
+
+function build_callback(trajectory_arr)
+    return function callback(state, active_set, args...)
+        return push!(trajectory_arr, (FrankWolfe.callback_state(state)..., length(active_set)))
+    end
+end
+
+trajectoryBPCG_standard = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    trajectory=false,
+    callback=build_callback(trajectoryBPCG_standard),
+);
+
+active_set_sparse = FrankWolfe.ActiveSetSparsifier(
+    FrankWolfe.ActiveSet([1.0], [x00], similar(x00)),
+    HiGHS.Optimizer(),
+)
+trajectoryBPCG_as_sparse = []
+x, v, primal, dual_gap, _ = FrankWolfe.blended_pairwise_conditional_gradient(
+    f,
+    grad!,
+    lmo,
+    copy(x00),
+    max_iteration=k,
+    line_search=FrankWolfe.Shortstep(2.0),
+    verbose=false,
+    callback=build_callback(trajectoryBPCG_as_sparse),
+);
+
+as_cardinality_bpcg = getindex.(trajectoryBPCG_standard, 6)
+as_cardinality_sparse = getindex.(trajectoryBPCG_as_sparse, 6)
+@test maximum(as_cardinality_sparse - as_cardinality_bpcg) <= 0
+
+dual_gap_bpcg = getindex.(trajectoryBPCG_standard, 4)
+dual_gap_sparse = getindex.(trajectoryBPCG_as_sparse, 4)
+@test maximum(dual_gap_sparse - dual_gap_bpcg) <= 1e-7
diff --git a/test/timeout.jl b/test/timeout.jl
index 7094ef5b3..9ba8aa47e 100644
--- a/test/timeout.jl
+++ b/test/timeout.jl
@@ -2,7 +2,7 @@ using FrankWolfe
 using LinearAlgebra
 using SparseArrays
 
-@testset "Timing out" begin
+@testset "Timing out                                        " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -17,7 +17,7 @@ using SparseArrays
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Agnostic(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
@@ -40,7 +40,7 @@ using SparseArrays
             x0,
             max_iteration=1000,
             line_search=FrankWolfe.Goldenratio(),
-            verbose=true,
+            verbose=false,
         )[3] - 0.2,
     ) < 1.0e-5
     @test abs(
diff --git a/test/tracking.jl b/test/tracking.jl
index ecd7d1823..dac8cb5f7 100644
--- a/test/tracking.jl
+++ b/test/tracking.jl
@@ -3,7 +3,7 @@ using LinearAlgebra
 using SparseArrays
 using FrankWolfe
 
-@testset "Tracking Testset" begin
+@testset "Tracking Testset                                  " begin
     f(x) = norm(x)^2
     function grad!(storage, x)
         return storage .= 2x
@@ -41,7 +41,7 @@ using FrankWolfe
     end
 end
 
-@testset "Testing vanilla Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing vanilla Frank-Wolfe                       " begin
     f(x) = norm(x)^2
 
     function grad!(storage, x)
@@ -68,7 +68,7 @@ end
         trajectory=true,
         callback=nothing,
         traj_data=storage,
-        verbose=true,
+        verbose=false,
     )
 
     @test length(storage[1]) == 5
@@ -79,7 +79,7 @@ end
     @test tlmo.counter == niters + 1 # x0 computation and initialization
 end
 
-@testset "Testing lazified Frank-Wolfe with various step size and momentum strategies" begin
+@testset "Testing lazified Frank-Wolfe                      " begin
     f(x) = norm(x)^2
 
     function grad!(storage, x)
diff --git a/test/trajectory_tests/open_loop_parametric.jl b/test/trajectory_tests/open_loop_parametric.jl
index 474d98ecb..38b0e5fce 100644
--- a/test/trajectory_tests/open_loop_parametric.jl
+++ b/test/trajectory_tests/open_loop_parametric.jl
@@ -26,7 +26,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(2),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -39,7 +39,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(10),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -68,7 +68,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(2),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
@@ -81,7 +81,7 @@ using LinearAlgebra
         line_search=FrankWolfe.Agnostic(10),
         print_iter=k / 10,
         epsilon=1e-5,
-        verbose=true,
+        verbose=false,
         trajectory=true,
     )
 
diff --git a/test/utils.jl b/test/utils.jl
index b535a3424..4d7cc4539 100644
--- a/test/utils.jl
+++ b/test/utils.jl
@@ -3,7 +3,7 @@ using LinearAlgebra
 using Test
 using SparseArrays
 
-@testset "Simple benchmark_oracles function" begin
+@testset "Simple benchmark_oracles function                 " begin
     n = Int(1e3)
 
     xpi = rand(n)
@@ -21,7 +21,7 @@ using SparseArrays
     FrankWolfe.benchmark_oracles(f, grad!, () -> rand(n), lmo_prob; k=100)
 end
 
-@testset "RankOneMatrix" begin
+@testset "RankOneMatrix                                     " begin
     for n in (1, 2, 5)
         for _ in 1:5
             v = rand(n)
@@ -64,7 +64,7 @@ end
     end
 end
 
-@testset "RankOne muladd_memory_mode $n" for n in (1, 2, 5)
+@testset "RankOne muladd_memory_mode $n                      " for n in (1, 2, 5)
     for _ in 1:5
         n = 5
         v = rand(n)
@@ -82,7 +82,7 @@ end
     end
 end
 
-@testset "Line Search methods" begin
+@testset "Line Search methods                               " begin
     a = [-1.0, -1.0, -1.0]
     b = [1.0, 1.0, 1.0]
     function grad!(storage, x)
@@ -214,14 +214,14 @@ end
     )
 end
 
-@testset "Momentum tests" begin
+@testset "Momentum tests                                    " begin
     it = FrankWolfe.ExpMomentumIterator()
     it.num = 0
     # no momentum -> 1
     @test FrankWolfe.momentum_iterate(it) == 1
 end
 
-@testset "Fast dot complex & norm" begin
+@testset "Fast dot complex & norm                           " begin
     s = sparse(I, 3, 3)
     m = randn(Complex{Float64}, 3, 3)
     @test dot(s, m) ≈ FrankWolfe.fast_dot(s, m)