diff --git a/include/ensmallen_bits/sdp/lin_alg.hpp b/include/ensmallen_bits/sdp/lin_alg.hpp index bfd70c739..97ee42f04 100644 --- a/include/ensmallen_bits/sdp/lin_alg.hpp +++ b/include/ensmallen_bits/sdp/lin_alg.hpp @@ -15,6 +15,38 @@ namespace ens { namespace math { +/** +* Matrix representation of a general Linear operator representated +* by a function/method. +* +* @param input Input function. +* @param output The matrix representation. +*/ +template +inline void convertToMatrix(std::function)> input, + MatBType& output) +{ + const size_t n = output.n_rows; + + arma::Mat tempInputCoordinates; + tempInputCoordinates.zeros(n, n); + + size_t idx = 0; + for (size_t i = 0; i < n; i++) + { + if (i > 0) tempInputCoordinates(i - 1, n - 1) = 0; + for (size_t j = i; j < n; j++) + { + if (j > i) tempInputCoordinates(i, j - 1) = 0; + tempInputCoordinates(i, j) = 1; + if (i == j) + output(i, j) = input(tempInputCoordinates); + else + output(i, j) = output(j, i) = 0.5 * arma::datum::sqrt2 * input(tempInputCoordinates); + } + } +} + inline size_t SvecIndex(size_t i, size_t j, size_t n) { if (i > j) @@ -109,6 +141,8 @@ inline void Smat(const MatAType& input, MatBType& output) } } + + /** * If A is a symmetric matrix, then SymKronId returns an operator Op such that * diff --git a/include/ensmallen_bits/sdp/lrsdp.hpp b/include/ensmallen_bits/sdp/lrsdp.hpp index c918163b1..411efce85 100644 --- a/include/ensmallen_bits/sdp/lrsdp.hpp +++ b/include/ensmallen_bits/sdp/lrsdp.hpp @@ -32,6 +32,21 @@ template class LRSDP { public: +/** +* Create an LRSDP to be optimized. The solution will end up being a matrix +* of size (rows) x (rank). To construct each constraint and the objective +* function, use the function SDP() in order to access the SDPType object +* associated with this optimizer. +* +* @param numSparseConstraints Number of sparse constraints in the problem. +* @param numDenseConstraints Number of dense constraints in the problem. +* @param initialPoint Initial point of the optimization. +* @param maxIterations Maximum number of iterations. +*/ +LRSDP(const size_t numSparseConstraints, + const size_t numDenseConstraints, + const arma::Mat& initialPoint, + const size_t maxIterations = 1000); /** * Create an LRSDP to be optimized. The solution will end up being a matrix * of size (rows) x (rank). To construct each constraint and the objective @@ -40,11 +55,14 @@ class LRSDP * * @param numSparseConstraints Number of sparse constraints in the problem. * @param numDenseConstraints Number of dense constraints in the problem. + * @param numLinearOperatorConstraints Number of general linear operator + constraints in the problem. * @param initialPoint Initial point of the optimization. * @param maxIterations Maximum number of iterations. */ LRSDP(const size_t numSparseConstraints, const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints, const arma::Mat& initialPoint, const size_t maxIterations = 1000); @@ -53,16 +71,15 @@ class LRSDP * given initial point. Note that the SDP may be modified later by calling * SDP() to access the object. * - * TODO: this is currently not implemented. * * @param sdp SDP to be solved. * @param initialPoint Initial point of the optimization. * @param maxIterations Maximum number of iterations. * + */ LRSDP(const SDPType& sdp, const arma::mat& initialPoint, const size_t maxIterations = 1000); - */ /** * Optimize the LRSDP and return the final objective value. The given diff --git a/include/ensmallen_bits/sdp/lrsdp_function.hpp b/include/ensmallen_bits/sdp/lrsdp_function.hpp index a0ac114ed..1b34c6f75 100644 --- a/include/ensmallen_bits/sdp/lrsdp_function.hpp +++ b/include/ensmallen_bits/sdp/lrsdp_function.hpp @@ -48,14 +48,16 @@ class LRSDPFunction * constraints. Note n_cols of the initialPoint specifies the rank. * * Set the A_x, B_x, and C_x matrices for each constraint using the A_x(), - * B_x(), and C_x() functions, for x in {sparse, dense}. + * B_x(), and C_x() functions, for x in {sparse, dense, linearOperator}. * * @param numSparseConstraints * @param numDenseConstraints + * @param numLinearOperatorConstraints * @param initialPoint */ LRSDPFunction(const size_t numSparseConstraints, const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints, const arma::Mat& initialPoint); /** @@ -118,6 +120,12 @@ class LRSDPFunction return rrt.As::type>(); } + // Cache constraint matrices of the general linear operators + // stored as functions. + const std::vector> LoA() const { + return loA; + } + //! Modify R*R^T matrix. template MatType& RRT() @@ -137,6 +145,9 @@ class LRSDPFunction //! Cache R*R^T matrix. Any rrt; + + //! Cache linear operators in matrix form + std::vector> loA; }; // Declare specializations in lrsdp_function.cpp. diff --git a/include/ensmallen_bits/sdp/lrsdp_function_impl.hpp b/include/ensmallen_bits/sdp/lrsdp_function_impl.hpp index 6cf2230f1..2cdf78149 100644 --- a/include/ensmallen_bits/sdp/lrsdp_function_impl.hpp +++ b/include/ensmallen_bits/sdp/lrsdp_function_impl.hpp @@ -23,7 +23,8 @@ LRSDPFunction::LRSDPFunction( const SDPType& sdp, const arma::Mat& initialPoint): sdp(sdp), - initialPoint(initialPoint) + initialPoint(initialPoint), + loA(sdp.NumLinearOperatorConstraints()) { if (initialPoint.n_rows < initialPoint.n_cols) { @@ -31,14 +32,24 @@ LRSDPFunction::LRSDPFunction( << "more columns than rows. It may be more efficient to find the " << "transposed solution." << std::endl; } + + for (size_t i = 0; i < sdp.NumLinearOperatorConstraints(); i++) + { + loA[i].zeros(sdp.N(), sdp.N()); + math::convertToMatrix(sdp.LinearOperators()[i], + loA[i]); + } } template LRSDPFunction::LRSDPFunction( const size_t numSparseConstraints, const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints, const arma::Mat& initialPoint): - sdp(initialPoint.n_rows, numSparseConstraints, numDenseConstraints), + sdp(initialPoint.n_rows, numSparseConstraints, numDenseConstraints, + numLinearOperatorConstraints), initialPoint(initialPoint) { if (initialPoint.n_rows < initialPoint.n_cols) @@ -91,11 +102,18 @@ typename MatType::elem_type LRSDPFunction::EvaluateConstraint( return accu(SDP().SparseA()[index] % rrt.As()) - SDP().SparseB()[index]; } - const size_t index1 = index - SDP().NumSparseConstraints(); + else if (index < SDP().NumSparseConstraints() + SDP().NumDenseConstraints()) { + const size_t index1 = index - SDP().NumSparseConstraints(); + + // For computation optimization we will be taking R^T * A first. + return trace((trans(coordinates) * SDP().DenseA()[index1]) * coordinates) + - SDP().DenseB()[index1]; + } - // For computation optimization we will be taking R^T * A first. - return trace((trans(coordinates) * SDP().DenseA()[index1]) * coordinates) - - SDP().DenseB()[index1]; + const size_t index2 = index - SDP().NumSparseConstraints() + - SDP().NumDenseConstraints(); + return SDP().LinearOperators()[index2](RRT>()) + - SDP().LinearOperatorsB()[index2]; } template @@ -124,7 +142,7 @@ void UpdateRRT(LRSDPFunction& function, //! used with an LRSDPFunction. template static inline void -UpdateObjective(typename MatType::elem_type& objective, +UpdateObjectiveFromMatrixConstraints(typename MatType::elem_type& objective, const MatType& rrt, const std::vector& ais, const VecType& bis, @@ -143,12 +161,35 @@ UpdateObjective(typename MatType::elem_type& objective, objective += (sigma / 2.) * constraint * constraint; } } +template +static inline void +UpdateObjectiveFromLinearOperatorConstraints( + typename MatType::elem_type& objective, + const MatType& rrt, + const std::vector)>>& ais, + const VecType& bis, + const arma::vec& lambda, + const size_t lambdaOffset, + const double sigma) +{ + for (size_t i = 0; i < ais.size(); ++i) + { + // Take the trace subtracted by the b_i. + // Here taking R^T * A first is not recommended as we are already + // using pre-computed R * R^T. Taking R^T * A first will result in increase + // in number of computations. + const double constraint = ais[i](rrt) - bis[i]; + objective -= (lambda[lambdaOffset + i] * constraint); + objective += (sigma / 2.) * constraint * constraint; + } +} //! Utility function for calculating part of the gradient when AugLagrangian is //! used with an LRSDPFunction. template static inline void -UpdateGradient(MatType& s, +UpdateGradientFromMatrixConstraints(MatType& s, const MatType& rrt, const std::vector& ais, const VecType& bis, @@ -166,6 +207,28 @@ UpdateGradient(MatType& s, s -= y * ais[i]; } } +template +static inline void +UpdateGradientFromLinearOperatorConstraints(MatType& s, + const MatType& rrt, + const std::vector)>>&ais, + const VecType& bis, + const arma::vec& lambda, + const size_t lambdaOffset, + const double sigma, + const std::vector> loA) +{ + for (size_t i = 0; i < ais.size(); ++i) + { + // Here taking R^T * A first is not recommended as we are already + // using pre-computed R * R^T. Taking R^T * A first will result in increase + // in number of computations. + const double constraint = ais[i](rrt) - bis[i]; + const double y = lambda[lambdaOffset + i] - sigma * constraint; + s -= y * loA[i]; + } +} template static inline double @@ -211,11 +274,18 @@ EvaluateImpl(LRSDPFunction& function, trace((trans(coordinates) * function.SDP().C()) * coordinates); // Now each constraint. - UpdateObjective(objective, function.template RRT(), + UpdateObjectiveFromMatrixConstraints( + objective, function.template RRT(), function.SDP().SparseA(), function.SDP().SparseB(), lambda, 0, sigma); - UpdateObjective(objective, function.template RRT(), + UpdateObjectiveFromMatrixConstraints( + objective, function.template RRT(), function.SDP().DenseA(), function.SDP().DenseB(), lambda, function.SDP().NumSparseConstraints(), sigma); + UpdateObjectiveFromLinearOperatorConstraints( + objective, function.template RRT>(), + function.SDP().LinearOperators(), function.SDP().LinearOperatorsB(), + lambda, function.SDP().NumSparseConstraints() + + function.SDP().NumDenseConstraints(), sigma); return objective; } @@ -238,12 +308,17 @@ GradientImpl(const LRSDPFunction& function, const MatType& rrt = function.template RRT(); MatType s(function.SDP().C()); - UpdateGradient( + UpdateGradientFromMatrixConstraints( s, rrt, function.SDP().SparseA(), function.SDP().SparseB(), lambda, 0, sigma); - UpdateGradient( + UpdateGradientFromMatrixConstraints( s, rrt, function.SDP().DenseA(), function.SDP().DenseB(), lambda, function.SDP().NumSparseConstraints(), sigma); + UpdateGradientFromLinearOperatorConstraints( + s, function.template RRT>(), + function.SDP().LinearOperators(), + function.SDP().LinearOperatorsB(), lambda, + function.SDP().NumDenseConstraints(), sigma, function.LoA()); gradient = 2 * s * coordinates; } diff --git a/include/ensmallen_bits/sdp/lrsdp_impl.hpp b/include/ensmallen_bits/sdp/lrsdp_impl.hpp index 85c547942..23f217e7a 100644 --- a/include/ensmallen_bits/sdp/lrsdp_impl.hpp +++ b/include/ensmallen_bits/sdp/lrsdp_impl.hpp @@ -17,12 +17,32 @@ namespace ens { +template +LRSDP::LRSDP(const size_t numSparseConstraints, + const size_t numDenseConstraints, + const arma::Mat& initialPoint, + const size_t maxIterations) : + function(numSparseConstraints, numDenseConstraints, + 0, initialPoint), + maxIterations(maxIterations) +{ } + template LRSDP::LRSDP(const size_t numSparseConstraints, const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints, const arma::Mat& initialPoint, const size_t maxIterations) : - function(numSparseConstraints, numDenseConstraints, initialPoint), + function(numSparseConstraints, numDenseConstraints, + numLinearOperatorConstraints, initialPoint), + maxIterations(maxIterations) +{ } + +template +LRSDP::LRSDP(const SDPType& sdp, + const arma::mat& initialPoint, + const size_t maxIterations): + function(sdp, initialPoint), maxIterations(maxIterations) { } diff --git a/include/ensmallen_bits/sdp/primal_dual.hpp b/include/ensmallen_bits/sdp/primal_dual.hpp index ddc7b8327..ceff75f0d 100644 --- a/include/ensmallen_bits/sdp/primal_dual.hpp +++ b/include/ensmallen_bits/sdp/primal_dual.hpp @@ -74,6 +74,7 @@ class PrimalDualSolver * @param coordinates The initial primal SDP coordinates to optimize. * @param ySparse The initial ySparse to optimize. * @param yDense The initial yDense to optimize. + * @param yDense The initial yLinearOperators to optimize. * @param dualCoordinates The initial dual SDP coordinates to optimize. * @param callbacks Callback functions. */ @@ -82,6 +83,7 @@ class PrimalDualSolver MatType& coordinates, MatType& ySparse, MatType& yDense, + MatType& yLinearOperators, MatType& dualCoordinates, CallbackTypes&&... callbacks); diff --git a/include/ensmallen_bits/sdp/primal_dual_impl.hpp b/include/ensmallen_bits/sdp/primal_dual_impl.hpp index 6e027ff47..282db72e1 100644 --- a/include/ensmallen_bits/sdp/primal_dual_impl.hpp +++ b/include/ensmallen_bits/sdp/primal_dual_impl.hpp @@ -33,6 +33,8 @@ #include "lin_alg.hpp" namespace ens { + + inline PrimalDualSolver::PrimalDualSolver(const size_t maxIterations, const double tau, const double normXzTol, @@ -116,7 +118,8 @@ SolveLyapunov(MatType& x, const AType& a, const BType& h) * * A = [ Asparse ] * [ Adense ] - * dy = [ dysparse dydense ] + * [ AlinearOperator ] + * dy = [ dysparse dydense dylinearOperator ] * E = Z sym I * F = X sym I * @@ -127,6 +130,7 @@ template(sdp.NumSparseConstraints(), 1)); MatType yDense(arma::ones(sdp.NumDenseConstraints(), 1)); + MatType yLinearOperators(arma::ones( + sdp.NumLinearOperatorConstraints(), 1)); MatType z(arma::eye(sdp.N(), sdp.N())); - return Optimize(sdp, coordinates, ySparse, yDense, z, callbacks...); + return Optimize(sdp, coordinates, ySparse, yDense, + yLinearOperators, z, callbacks...); } template @@ -206,6 +228,7 @@ typename MatType::elem_type PrimalDualSolver::Optimize( MatType& coordinates, MatType& ySparse, MatType& yDense, + MatType& yLinearOperators, MatType& dualCoordinates, CallbackTypes&&... callbacks) { @@ -243,10 +266,23 @@ typename MatType::elem_type PrimalDualSolver::Optimize( " one column."); } + if (yLinearOperators.n_rows != sdp.NumLinearOperatorConstraints()) + { + throw std::logic_error("PrimalDualSolver::Optimize(): yLinearOperators" + " needs to have the same length as the number of linear operator" + " constraints."); + } + + if (yLinearOperators.n_cols != 1) + { + throw std::logic_error("PrimalDualSolver::Optimize(): yLinearOperators" + " must have only one column."); + } + if (yDense.n_rows != sdp.NumDenseConstraints()) { - throw std::logic_error("PrimalDualSolver::Optimize(): yDense needs to have " - "the same length as the number of dense constraints."); + throw std::logic_error("PrimalDualSolver::Optimize(): yDense needs to have " + "the same length as the number of dense constraints."); } if (dualCoordinates.n_rows != sdp.N() || dualCoordinates.n_cols != sdp.N()) @@ -264,6 +300,18 @@ typename MatType::elem_type PrimalDualSolver::Optimize( const size_t n = sdp.N(); const size_t n2bar = sdp.N2bar(); + // Cache constarint dense matrices of the general linear operators + // stored as functions. + std::vector + loA(sdp.NumLinearOperatorConstraints()); + for (size_t i = 0; i < sdp.NumLinearOperatorConstraints(); i++) + { + loA[i].zeros(n, n); + math::convertToMatrix(sdp.LinearOperators()[i], + loA[i]); + } + // Form the A matrix in (2.7). Note we explicitly handle // sparse and dense constraints separately. @@ -286,23 +334,33 @@ typename MatType::elem_type PrimalDualSolver::Optimize( aDense.row(i) = aiDense.t(); } + typename SDPType::DenseConstraintType aLinearOperators( + sdp.NumLinearOperatorConstraints(), n2bar); + typename SDPType::DenseConstraintType aiLinearOperator; + for (size_t i = 0; i < sdp.NumLinearOperatorConstraints(); i++) + { + math::Svec(loA[i], aiLinearOperator); + aLinearOperators.row(i) = aiLinearOperator.t(); + } + typename SDPType::ObjectiveType sc; math::Svec(sdp.C(), sc); - MatType sx, sz, dySparse, dyDense, dsx, dsz, dX, dZ; + MatType sx, sz, dySparse, dyDense, dyLinearOperators, dsx, dsz, dX, dZ; math::Svec(coordinates, sx); math::Svec(dualCoordinates, sz); MatType rp, rd, rc, gk; - MatType rcMat, fMat, eInvFaSparseT, eInvFaDenseT, gkMat, - m, dualCheck; + MatType rcMat, fMat, eInvFaSparseT, eInvFaDenseT, eInvFaLinearOperatorsT, + gkMat, m, dualCheck; rp.set_size(sdp.NumConstraints(), 1); eInvFaSparseT.set_size(n2bar, sdp.NumSparseConstraints()); eInvFaDenseT.set_size(n2bar, sdp.NumDenseConstraints()); + eInvFaLinearOperatorsT.set_size(n2bar, sdp.NumLinearOperatorConstraints()); m.zeros(sdp.NumConstraints(), sdp.NumConstraints()); // Controls early termination of the optimization process. @@ -311,6 +369,7 @@ typename MatType::elem_type PrimalDualSolver::Optimize( typename SDPType::ElemType primalObj = 0., alpha, beta; terminate |= Callback::BeginOptimization(*this, sdp, coordinates, callbacks...); + for (size_t iteration = 1; iteration != maxIterations && !terminate; iteration++) { @@ -320,6 +379,7 @@ typename MatType::elem_type PrimalDualSolver::Optimize( // the KKT system again. Empirically, this PC step has been shown to // significantly reduce the number of required iterations (and is used // by most practical solver implementations). + if (sdp.NumSparseConstraints()) { rp(arma::span(0, sdp.NumSparseConstraints() - 1), 0) = @@ -327,12 +387,21 @@ typename MatType::elem_type PrimalDualSolver::Optimize( } if (sdp.NumDenseConstraints()) { - rp(arma::span(sdp.NumSparseConstraints(), sdp.NumConstraints() - 1), 0) = + rp(arma::span(sdp.NumSparseConstraints(), sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1), 0) = sdp.DenseB() - aDense * sx; } + if (sdp.NumLinearOperatorConstraints()) + { + rp(arma::span(sdp.NumSparseConstraints()+sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1), 0) = + sdp.LinearOperatorsB() - aLinearOperators * sx; + } + // Rd = C - Z - smat A^T y - rd = sc - sz - aSparse.t() * ySparse - aDense.t() * yDense; + rd = sc - sz - aSparse.t() * ySparse - aDense.t() * yDense - + aLinearOperators.t() * yLinearOperators; math::SymKronId(coordinates, fMat); @@ -354,42 +423,104 @@ typename MatType::elem_type PrimalDualSolver::Optimize( eInvFaDenseT.col(i) = gk; } + for (size_t i = 0; i < sdp.NumLinearOperatorConstraints(); i++) + { + SolveLyapunov(gkMat, dualCoordinates, coordinates * loA[i] + + loA[i] * coordinates); + math::Svec(gkMat, gk); + eInvFaLinearOperatorsT.col(i) = gk; + } + // Form the M = A E^(-1) F A^T matrix (2.15) // - // Since we split A up into its sparse and dense components, - // we have to handle each block separately. + // Since we split A up into its sparse, dense, and linear + // operator components, we have to handle each block separately. + if (sdp.NumSparseConstraints()) { m.submat(arma::span(0, sdp.NumSparseConstraints() - 1), arma::span(0, sdp.NumSparseConstraints() - 1)) = aSparse * eInvFaSparseT; + if (sdp.NumDenseConstraints()) { m.submat(arma::span(0, sdp.NumSparseConstraints() - 1), arma::span(sdp.NumSparseConstraints(), - sdp.NumConstraints() - 1)) = + sdp.NumSparseConstraints()+ + sdp.NumDenseConstraints() - 1)) = aSparse * eInvFaDenseT; } + + if (sdp.NumLinearOperatorConstraints()) + { + m.submat(arma::span(0, sdp.NumSparseConstraints() - 1), + arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1)) = + aSparse * eInvFaLinearOperatorsT; + } } if (sdp.NumDenseConstraints()) { - if (sdp.NumSparseConstraints()) - { + if (sdp.NumSparseConstraints()) + { + m.submat(arma::span(sdp.NumSparseConstraints(), + sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1), + arma::span(0, sdp.NumSparseConstraints() - 1)) = + aDense * eInvFaSparseT; + } + m.submat(arma::span(sdp.NumSparseConstraints(), + sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1), + arma::span(sdp.NumSparseConstraints(), + sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1)) = + aDense * eInvFaDenseT; + + if (sdp.NumLinearOperatorConstraints()) + { + m.submat(arma::span(sdp.NumSparseConstraints(), + sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1), + arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1)) = + aDense * eInvFaLinearOperatorsT; + } + } + + if (sdp.NumLinearOperatorConstraints()) + { + if (sdp.NumSparseConstraints()) + { + m.submat(arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1), + arma::span(0, sdp.NumSparseConstraints() - 1)) = + aLinearOperators * eInvFaSparseT; + } + if (sdp.NumDenseConstraints()) { + m.submat(arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1), + arma::span(sdp.NumSparseConstraints(), + sdp.NumSparseConstraints() + + sdp.NumDenseConstraints() - 1)) = + aLinearOperators * eInvFaDenseT; + } + + m.submat(arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), sdp.NumConstraints() - 1), - arma::span(0, - sdp.NumSparseConstraints() - 1)) = - aDense * eInvFaSparseT; - } - m.submat(arma::span(sdp.NumSparseConstraints(), - sdp.NumConstraints() - 1), - arma::span(sdp.NumSparseConstraints(), - sdp.NumConstraints() - 1)) = - aDense * eInvFaDenseT; + arma::span(sdp.NumSparseConstraints() + + sdp.NumDenseConstraints(), + sdp.NumConstraints() - 1)) = + aLinearOperators * eInvFaLinearOperatorsT; } const typename MatType::elem_type sxdotsz = arma::dot(sx, sz); - // TODO(stephentu): computing these alphahats should take advantage of // the cholesky decomposition of X and Z which we should have available // when we use more efficient methods above. @@ -397,8 +528,10 @@ typename MatType::elem_type PrimalDualSolver::Optimize( // This solves step (1) of Section 7, the "predictor" step. rcMat = -0.5 * (coordinates * dualCoordinates + dualCoordinates * coordinates); math::Svec(rcMat, rc); - SolveKKTSystem(aSparse, aDense, dualCoordinates, m, fMat, rp, rd, rc, dsx, - dySparse, dyDense, dsz); + + SolveKKTSystem(aSparse, aDense, aLinearOperators, dualCoordinates, m, fMat, rp, rd, rc, dsx, + dySparse, dyDense, dyLinearOperators, dsz); + math::Smat(dsx, dX); math::Smat(dsz, dZ); @@ -436,8 +569,11 @@ typename MatType::elem_type PrimalDualSolver::Optimize( dX * dZ + dZ * dX); math::Svec(rcMat, rc); - SolveKKTSystem(aSparse, aDense, dualCoordinates, m, fMat, rp, rd, rc, dsx, - dySparse, dyDense, dsz); + + SolveKKTSystem(aSparse, aDense, aLinearOperators, dualCoordinates, + m, fMat, rp, rd, rc, dsx, + dySparse, dyDense, dyLinearOperators, dsz); + math::Smat(dsx, dX); math::Smat(dsz, dZ); if (!Alpha(coordinates, dX, tau, alpha)) @@ -462,10 +598,14 @@ typename MatType::elem_type PrimalDualSolver::Optimize( terminate |= Callback::StepTaken(*this, sdp, coordinates, callbacks...); math::Svec(coordinates, sx); + if (dySparse.n_cols != 0) ySparse += beta * dySparse; if (dyDense.n_cols != 0) yDense += beta * dyDense; + if (dyLinearOperators.n_cols != 0) + yLinearOperators += beta * dyLinearOperators; + dualCoordinates += beta * dZ; math::Svec(dualCoordinates, sz); @@ -484,8 +624,11 @@ typename MatType::elem_type PrimalDualSolver::Optimize( const double sparsePrimalInfeas = arma::norm(sdp.SparseB() - aSparse * sx, 2); const double densePrimalInfeas = arma::norm(sdp.DenseB() - aDense * sx, 2); + const double linearOperatorPrimalInfeas = arma::norm(sdp.LinearOperatorsB() - + aLinearOperators * sx, 2); const double primalInfeas = sqrt(sparsePrimalInfeas * sparsePrimalInfeas + - densePrimalInfeas * densePrimalInfeas); + densePrimalInfeas * densePrimalInfeas + + linearOperatorPrimalInfeas * linearOperatorPrimalInfeas); primalObj = arma::dot(sdp.C(), coordinates); @@ -498,11 +641,16 @@ typename MatType::elem_type PrimalDualSolver::Optimize( // TODO(stephentu): this dual check is quite expensive, // maybe make it optional? + // + dualCheck = dualCoordinates - sdp.C(); for (size_t i = 0; i < sdp.NumSparseConstraints(); i++) dualCheck += ySparse(i) * sdp.SparseA()[i]; for (size_t i = 0; i < sdp.NumDenseConstraints(); i++) dualCheck += yDense(i) * sdp.DenseA()[i]; + for (size_t i = 0; i < sdp.NumLinearOperatorConstraints(); i++) + dualCheck += yLinearOperators(i) * loA[i]; + const double dualInfeas = arma::norm(dualCheck, "fro"); if (normXZ <= normXzTol && primalInfeas <= primalInfeasTol && diff --git a/include/ensmallen_bits/sdp/sdp.hpp b/include/ensmallen_bits/sdp/sdp.hpp index f12b2524e..710d0e2d1 100644 --- a/include/ensmallen_bits/sdp/sdp.hpp +++ b/include/ensmallen_bits/sdp/sdp.hpp @@ -20,11 +20,18 @@ namespace ens { * s.t. dot(Ai, X) = bi, i=1,...,m, X >= 0 * * This representation allows the constraint matrices Ai to be specified as - * either dense matrices (arma::mat) or sparse matrices (arma::sp_mat). After - * initializing the SDP object, you will need to set the constraints yourself, - * via the SparseA(), SparseB(), DenseA(), DenseB(), and C() functions. Note - * that for each matrix you add to either SparseA() or DenseA(), you must add - * the corresponding b value to the corresponding vector SparseB() or DenseB(). + * either dense matrices (arma::mat) or sparse matrices (arma::sp_mat). + * General linear operators are to be specified as std::function objects. + * Each of these functions must take a matrix as input and return a real + * number. The matrix input and number output must be of types + * arma::Mat and + * ObjectiveMatrixType::elem_type respectively (see template parameters). + * After initializing the SDP object, you will need to set + * the constraints yourself, via the SparseA(), SparseB(), DenseA(), DenseB(), + * LinearOperatorsA(), LinearOperatorsB(), and C() functions. + * Note that for each matrix you add to either SparseA(), DenseA(), or + * LinearOperatorsA() you must add the corresponding b value to the + * corresponding vector SparseB(), DenseB(), or LinearOperatorsB(). * * The objective matrix (C) may be stored as either dense or sparse depending on * the ObjectiveMatrixType parameter. @@ -54,28 +61,33 @@ class SDP /** * Initialize this SDP to an empty state. To add constraints, you will have - * to modify the constraints via the SparseA(), DenseA(), SparseB(), DenseB(), - * and C() functions. For the sake of speed, there is no error checking, so - * if you specify an invalid SDP, whatever solver you use will gladly try to - * solve it! (And it will probably crash horribly.) + * to modify the constraints via the SparseA(), DenseA(), LinearOperatorsA(), + * SparseB(), DenseB(), LinearOperatorsB() and C() functions. For the sake + * of speed, there is no error checking, so if you specify an invalid SDP, + * whatever solver you use will gladly try to solve it! (And it will probably + * crash horribly.) */ SDP(); /** * Initialize this SDP to one which structurally has size n. To set the * constraints you will still need to access through SparseA(), DenseA(), - * SparseB(), DenseB(), and C(). Consider using move semantics to keep things - * fast. As with the previous constructor, there is no error checking for the - * sake of speed, so if you build an invalid SDP, whatever solver you use will + * LinearOperatorsA(), SparseB(), DenseB(), LinearOperatorsB() and C(). + * Consider using move semantics to keep things fast. As with the + * previous constructor, there is no error checking for the sake of + * speed, so if you build an invalid SDP, whatever solver you use will * gladly try to solve it! (And it will probably crash horribly.) * * @param n Number of rows (and columns) in the objective matrix C. * @param numSparseConstraints Number of sparse constraints. * @param numDenseConstraints Number of dense constraints. + * @param numLinearOperatorConstraints Number of general linear operator + * constraints. */ SDP(const size_t n, const size_t numSparseConstraints, - const size_t numDenseConstraints); + const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints = 0); //! Return number of rows and columns in the objective matrix C. size_t N() const { return c.n_rows; } @@ -88,9 +100,12 @@ class SDP //! Return the number of dense constraints (constraints with dense Ai) in the //! SDP. size_t NumDenseConstraints() const { return denseB.n_elem; } + //! Return the number of general linear operator constraints + //! (constraints represented as functions) in the SDP. + size_t NumLinearOperatorConstraints() const { return linearOperatorsB.n_elem; } //! Return the total number of constraints in the SDP. - size_t NumConstraints() const { return sparseB.n_elem + denseB.n_elem; } + size_t NumConstraints() const { return sparseB.n_elem + denseB.n_elem + linearOperatorsB.n_elem; } //! Modify the sparse objective function matrix (sparseC). ObjectiveMatrixType& C() { return c; } @@ -115,6 +130,23 @@ class SDP //! constraints). std::vector& DenseA() { return denseA; } + + //! Return the vector of linear operator functions (which correspond to the general + //! linear operator constraints). + const std::vector)>>& + LinearOperators() const + { return linearOperators; } + + //! Modify the vector of linear operator functions (which correspond to the general + //! linear operator constraints). + std::vector)>>& + LinearOperators() + { return linearOperators; } + //! Return the vector of sparse B values. const BVectorType& SparseB() const { return sparseB; } //! Modify the vector of sparse B values. @@ -125,6 +157,11 @@ class SDP //! Modify the vector of dense B values. BVectorType& DenseB() { return denseB; } + //! Return the vector of Linear operator B values. + const BVectorType& LinearOperatorsB() const { return linearOperatorsB; } + //! Modify the vector of Linear operator B values. + BVectorType& LinearOperatorsB() { return linearOperatorsB; } + /** * Check whether or not the constraint matrices are linearly independent. * @@ -141,6 +178,7 @@ class SDP void GetInitialPoints(MatType& coordinates, MatType& ySparse, MatType& yDense, + MatType& yLinearOperators, MatType& dualCoordinates) const; private: @@ -149,13 +187,25 @@ class SDP //! A_i for each sparse constraint. std::vector sparseA; + //! b_i for each sparse constraint. BVectorType sparseB; //! A_i for each dense constraint. std::vector denseA; + //! b_i for each dense constraint. BVectorType denseB; + + //! General linear operator constraints. + std::vector)>> + linearOperators; + + //! b_i for each linear operator constraint. + BVectorType linearOperatorsB; + }; } // namespace ens diff --git a/include/ensmallen_bits/sdp/sdp_impl.hpp b/include/ensmallen_bits/sdp/sdp_impl.hpp index a233f26bd..0b4c5b260 100644 --- a/include/ensmallen_bits/sdp/sdp_impl.hpp +++ b/include/ensmallen_bits/sdp/sdp_impl.hpp @@ -28,7 +28,9 @@ SDP::SDP(const size_t n, const size_t numSparseConstraints, - const size_t numDenseConstraints) : + const size_t numDenseConstraints, + const size_t numLinearOperatorConstraints) : c(n, n), sparseA(numSparseConstraints), sparseB(numSparseConstraints), denseA(numDenseConstraints), - denseB(numDenseConstraints) + denseB(numDenseConstraints), + linearOperators(numLinearOperatorConstraints), + linearOperatorsB(numLinearOperatorConstraints) + { for (size_t i = 0; i < numSparseConstraints; i++) sparseA[i].zeros(n, n); @@ -81,7 +87,16 @@ bool SDP(LinearOperators()[i], + lo); + math::Svec(lo, sa); + A.row(NumSparseConstraints()+ NumDenseConstraints() + i) = sa.t(); + } const DenseConstraintMatrixType s = arma::svd(A); return s(s.n_elem - 1) > 1e-5; } @@ -112,11 +127,13 @@ void SDP::GetInitialPoints(MatType& coordinates, MatType& ySparse, MatType& yDense, + MatType& yLinearOperators, MatType& dualCoordinates) const { coordinates = arma::eye(c.n_rows, c.n_rows); ySparse = arma::ones(NumSparseConstraints(), 1); yDense = arma::ones(NumDenseConstraints(), 1); + yLinearOperators = arma::ones(NumLinearOperatorConstraints(), 1); dualCoordinates = arma::eye(c.n_rows, c.n_rows); } diff --git a/tests/sdp_primal_dual_test.cpp b/tests/sdp_primal_dual_test.cpp index d7da50f60..7506b6d05 100644 --- a/tests/sdp_primal_dual_test.cpp +++ b/tests/sdp_primal_dual_test.cpp @@ -238,8 +238,9 @@ static bool CheckKKT(const SDPType& sdp, static void SolveMaxCutFeasibleSDP(const SDP& sdp) { arma::mat X, Z; - arma::mat ysparse, ydense; + arma::mat ysparse, ydense, yLinearOperators; ydense.set_size(0); + yLinearOperators.set_size(0); // strictly feasible starting point X.eye(sdp.N(), sdp.N()); @@ -248,15 +249,16 @@ static void SolveMaxCutFeasibleSDP(const SDP& sdp) PrimalDualSolver solver; - solver.Optimize(sdp, X, ysparse, ydense, Z); + solver.Optimize(sdp, X, ysparse, ydense, yLinearOperators, Z); CheckKKT(sdp, X, ysparse, ydense, Z); } static void SolveMaxCutPositiveSDP(const SDP& sdp) { arma::mat X, Z; - arma::mat ysparse, ydense; + arma::mat ysparse, ydense, yLinearOperators; ydense.set_size(0); + yLinearOperators.set_size(0); // infeasible, but positive starting point X = arma::eye(sdp.N(), sdp.N()); @@ -264,7 +266,7 @@ static void SolveMaxCutPositiveSDP(const SDP& sdp) Z.eye(sdp.N(), sdp.N()); PrimalDualSolver solver; - solver.Optimize(sdp, X, ysparse, ydense, Z); + solver.Optimize(sdp, X, ysparse, ydense, yLinearOperators, Z); CheckKKT(sdp, X, ysparse, ydense, Z); } @@ -295,9 +297,9 @@ TEST_CASE("DeprecatedSmallLovaszThetaSdp", "[SdpPrimalDualTest]") PrimalDualSolver solver; arma::mat X, Z; - arma::mat ysparse, ydense; - sdp.GetInitialPoints(X, ysparse, ydense, Z); - solver.Optimize(sdp, X, ysparse, ydense, Z); + arma::mat ysparse, ydense, yLinearOperators; + sdp.GetInitialPoints(X, ysparse, ydense, yLinearOperators, Z); + solver.Optimize(sdp, X, ysparse, ydense, yLinearOperators, Z); CheckKKT(sdp, X, ysparse, ydense, Z); } @@ -309,9 +311,9 @@ TEST_CASE("SmallLovaszThetaSdp", "[SdpPrimalDualTest]") PrimalDualSolver solver; - arma::mat X, Z, ysparse, ydense; - sdp.GetInitialPoints(X, ysparse, ydense, Z); - solver.Optimize(sdp, X, ysparse, ydense, Z); + arma::mat X, Z, ysparse, ydense, yLinearOperators; + sdp.GetInitialPoints(X, ysparse, ydense, yLinearOperators, Z); + solver.Optimize(sdp, X, ysparse, ydense, yLinearOperators, Z); CheckKKT(sdp, X, ysparse, ydense, Z); } @@ -444,9 +446,9 @@ TEST_CASE("LogChebychevApproxSdp","[SdpPrimalDualTest]") const auto sdp0 = ConstructLogChebychevApproxSdp(A0, b0); PrimalDualSolver solver0; arma::mat X0, Z0; - arma::mat ysparse0, ydense0; - sdp0.GetInitialPoints(X0, ysparse0, ydense0, Z0); - solver0.Optimize(sdp0, X0, ysparse0, ydense0, Z0); + arma::mat ysparse0, ydense0, yLinearOperators0; + sdp0.GetInitialPoints(X0, ysparse0, ydense0, yLinearOperators0, Z0); + solver0.Optimize(sdp0, X0, ysparse0, ydense0, yLinearOperators0, Z0); success = CheckKKT(sdp0, X0, ysparse0, ydense0, Z0); if (success) break; @@ -464,9 +466,9 @@ TEST_CASE("LogChebychevApproxSdp","[SdpPrimalDualTest]") const auto sdp1 = ConstructLogChebychevApproxSdp(A1, b1); PrimalDualSolver solver1; arma::mat X1, Z1; - arma::mat ysparse1, ydense1; - sdp1.GetInitialPoints(X1, ysparse1, ydense1, Z1); - solver1.Optimize(sdp1, X1, ysparse1, ydense1, Z1); + arma::mat ysparse1, ydense1, yLinearOperators1; + sdp1.GetInitialPoints(X1, ysparse1, ydense1, yLinearOperators1, Z1); + solver1.Optimize(sdp1, X1, ysparse1, ydense1, yLinearOperators1, Z1); success = CheckKKT(sdp1, X1, ysparse1, ydense1, Z1); if (success) break; @@ -578,9 +580,9 @@ TEST_CASE("CorrelationCoeffToySdp","[SdpPrimalDualTest]") PrimalDualSolver solver; arma::mat X, Z; - arma::mat ysparse, ydense; - sdp.GetInitialPoints(X, ysparse, ydense, Z); - const double obj = solver.Optimize(sdp, X, ysparse, ydense, Z); + arma::mat ysparse, ydense, yLinearOperators; + sdp.GetInitialPoints(X, ysparse, ydense, yLinearOperators, Z); + const double obj = solver.Optimize(sdp, X, ysparse, ydense, yLinearOperators, Z); bool success = CheckKKT(sdp, X, ysparse, ydense, Z); REQUIRE(success == true); REQUIRE(obj == Approx(2 * (-0.978)).epsilon(1e-5));