diff --git a/doc/optimizers.md b/doc/optimizers.md index bb8a25073..a3fdb3dfb 100644 --- a/doc/optimizers.md +++ b/doc/optimizers.md @@ -869,6 +869,12 @@ matrix within an iterative procedure using the covariance matrix. * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize`_`)` * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy`_`)` * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize`_`)` + * `CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations`_`)` +* `CMAES(lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations, minObjective)` +* `CMAES(lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations, minObjective, toleranceConditionCov)` +* `CMAES(lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations, minObjective, toleranceConditionCov, toleranceNoEffectAxis)` +* `CMAES(lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations, minObjective, toleranceConditionCov, toleranceNoEffectAxis, toleranceNoEffectCoord)` +* `CMAES(lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxFunctionEvaluations, minObjective, toleranceConditionCov, toleranceNoEffectAxis, toleranceNoEffectCoord, toleranceRange, toleranceRangePatience)` The _`SelectionPolicyType`_ template parameter refers to the strategy used to compute the (approximate) objective function. The `FullSelection` and @@ -896,6 +902,13 @@ For convenience the following types can be used: | `double` | **`tolerance`** | Maximum absolute tolerance to terminate algorithm. | `1e-5` | | `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` | | `size_t` | **`stepSize`** | Initial step size | `0` | +| `int` | **`maxFunctionEvaluations`** | Stop if the number of function evaluations reaches this limit. | `std::numeric_limits::max()` | +| `double` | **`minObjective`** | Stop if the best objective value is less than or equal to this target value. | `std::numeric_limits::lowest()` | +| `size_t` | **`toleranceConditionCov`** | Tolerance for stopping if the condition number of the covariance matrix exceeds this threshold. | `1e14` | +| `double` | **`toleranceNoEffectAxis`** | Tolerance for stopping if adding a 0.1-standard deviation vector in a principal axis direction of mean vector. | `1e-12` | +| `double` | **`toleranceNoEffectCoord`** | Tolerance for stopping if adding a 0.2-standard deviation in each coordinate does not change mean vector. | `1e-12` | +| `double` | **`toleranceRange`** | Tolerance for stopping if the range of the fitness values is less than this value. | `1e-12` | +| `int` | **`toleranceRangePatience`** | Patience for stopping if the range of the fitness values remains less than `toleranceRange` for a defined number of generations. | `1` | Attributes of the optimizer may also be changed via the member methods `Lambda()`, `TransformationPolicy()`, `BatchSize()`, `MaxIterations()`, @@ -925,12 +938,12 @@ RosenbrockFunction f; arma::mat coordinates = f.GetInitialPoint(); // CMAES with the FullSelection and BoundaryBoxConstraint policies. -BoundaryBoxConstraint b(-1, 1); +BoundaryBoxConstraint<> b(-1, 1); CMAES optimizer(0, b, 32, 200, 1e-4); optimizer.Optimize(f, coordinates); // CMAES with the RandomSelection and BoundaryBoxConstraint policies. -ApproxCMAES> cmaes(0, b, 32, 200, 1e-4); +ApproxCMAES> approxOptimizer(0, b, 32, 200, 1e-4); approxOptimizer.Optimize(f, coordinates); ``` diff --git a/include/ensmallen_bits/cmaes/cmaes.hpp b/include/ensmallen_bits/cmaes/cmaes.hpp index 0ae323490..781d7e58f 100644 --- a/include/ensmallen_bits/cmaes/cmaes.hpp +++ b/include/ensmallen_bits/cmaes/cmaes.hpp @@ -75,6 +75,13 @@ class CMAES * @param selectionPolicy Instantiated selection policy used to calculate the * objective. * @param stepSize Starting sigma/step size (will be modified). + * @param maxFunctionEvaluations Maximum number of function evaluations allowed. + * @param minObjective Minimum objective value to terminate the optimization. + * @param toleranceConditionCov Tolerance condition for covariance matrix. + * @param toleranceNoEffectCoord Tolerance for stopping if there is no change when adding 0.2 std in each coordinate. + * @param toleranceNoEffectAxis Tolerance for stopping if there is no change when adding 0.1 std along each axis. + * @param toleranceRange Tolerance for stopping if range of the fitness values is less than this value. + * @param toleranceRangePatience Patience for stopping if range of the fitness values is less than toleranceRange. */ CMAES(const size_t lambda = 0, const TransformationPolicyType& @@ -83,7 +90,15 @@ class CMAES const size_t maxIterations = 1000, const double tolerance = 1e-5, const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), - double stepSize = 0); + double stepSize = 0, + const int maxFunctionEvaluations = std::numeric_limits::max(), + const double minObjective = std::numeric_limits::lowest(), + const size_t toleranceConditionCov = 1e14, + const double toleranceNoEffectCoord = 1e-12, + const double toleranceNoEffectAxis = 1e-12, + const double toleranceRange = 1e-12, + const size_t toleranceRangePatience = 1 + ); /** * Construct the CMA-ES optimizer with the given function and parameters @@ -103,6 +118,13 @@ class CMAES * @param selectionPolicy Instantiated selection policy used to calculate the * objective. * @param stepSize Starting sigma/step size (will be modified). + * @param maxFunctionEvaluations Maximum number of function evaluations allowed. + * @param minObjective Minimum objective value to terminate the optimization. + * @param toleranceConditionCov Tolerance condition for covariance matrix. + * @param toleranceNoEffectCoord Tolerance for stopping if there is no change when adding 0.2 std in each coordinate. + * @param toleranceNoEffectAxis Tolerance for stopping if there is no change when adding 0.1 std along each axis. + * @param toleranceRange Tolerance for stopping if range of the fitness values is less than this value. + * @param toleranceRangePatience Patience for stopping if range of the fitness values is less than toleranceRange. */ CMAES(const size_t lambda = 0, const double lowerBound = -10, @@ -111,7 +133,15 @@ class CMAES const size_t maxIterations = 1000, const double tolerance = 1e-5, const SelectionPolicyType& selectionPolicy = SelectionPolicyType(), - double stepSize = 0); + double stepSize = 0, + const int maxFunctionEvaluations = std::numeric_limits::max(), + const double minObjective = std::numeric_limits::lowest(), + const size_t toleranceConditionCov = 1e14, + const double toleranceNoEffectCoord = 1e-12, + const double toleranceNoEffectAxis = 1e-12, + const double toleranceRange = 1e-12, + const size_t toleranceRangePatience = 1 + ); /** * Optimize the given function using CMA-ES. The given starting point will be @@ -153,6 +183,21 @@ class CMAES //! Modify the tolerance for termination. double& Tolerance() { return tolerance; } + //! Get the maximum number of function evaluations. + size_t MaxFunctionEvaluations() const { return maxFunctionEvaluations; } + //! Modify the maximum number of function evaluations. + size_t& MaxFunctionEvaluations() { return maxFunctionEvaluations; } + + //! Get the minimum objective value to terminate the optimization. + double MinObjective() const { return minObjective; } + //! Modify the minimum objective value to terminate the optimization. + double& MinObjective() { return minObjective; } + + //! Get the tolerance condition for covariance matrix. + size_t ToleranceConditionCov() const { return toleranceConditionCov; } + //! Modify the tolerance condition for covariance matrix. + size_t& ToleranceConditionCov() { return toleranceConditionCov; } + //! Get the selection policy. const SelectionPolicyType& SelectionPolicy() const { return selectionPolicy; } //! Modify the selection policy. @@ -172,6 +217,38 @@ class CMAES double& StepSize() { return stepSize; } + //! Get the total number of function evaluations. + size_t FunctionEvaluations() const + { return functionEvaluations; } + + //! Get the tolerance for stopping if there is no change when adding 0.2 std in each coordinate. + double ToleranceNoEffectCoord() const + { return toleranceNoEffectCoord; } + //! Modify the tolerance for stopping if there is no change when adding 0.2 std in each coordinate. + double& ToleranceNoEffectCoord() + { return toleranceNoEffectCoord; } + + //! Get the tolerance for stopping if there is no change when adding 0.1 std along each axis. + double ToleranceNoEffectAxis() const + { return toleranceNoEffectAxis; } + //! Modify the tolerance for stopping if there is no change when adding 0.1 std along each axis. + double& ToleranceNoEffectAxis() + { return toleranceNoEffectAxis; } + + //! Get the tolerance for stopping if range of the fitness values is less than this value. + double ToleranceRange() const + { return toleranceRange; } + //! Modify the tolerance for stopping if range of the fitness values is less than this value. + double& ToleranceRange() + { return toleranceRange; } + + //! Get the patience for stopping if range of the fitness values is less than toleranceRange. + size_t ToleranceRangePatience() const + { return toleranceRangePatience; } + //! Modify the patience for stopping if range of the fitness values is less than toleranceRange. + size_t& ToleranceRangePatience() + { return toleranceRangePatience; } + private: //! Population size. size_t lambda; @@ -185,6 +262,21 @@ class CMAES //! The tolerance for termination. double tolerance; + //! The tolerance for stopping if there is no change when adding 0.2 std in each coordinate. + double toleranceNoEffectCoord; + + //! The tolerance for stopping if there is no change when adding 0.1 std along each axis. + double toleranceNoEffectAxis; + + //! The maximum number of function evaluations. + size_t maxFunctionEvaluations; + + //! The minimum objective value to terminate the optimization. + double minObjective; + + //! The tolerance condition for covariance matrix. + size_t toleranceConditionCov; + //! The selection policy used to calculate the objective. SelectionPolicyType selectionPolicy; @@ -195,6 +287,15 @@ class CMAES //! The step size. double stepSize; + + //! Counter for the number of function evaluations. + size_t functionEvaluations = 0; + + //! The tolerance for stopping if range of the fitness values is less than this value. + double toleranceRange; + + //! The patience for stopping if range of the fitness values is less than toleranceRange. + size_t toleranceRangePatience; }; /** diff --git a/include/ensmallen_bits/cmaes/cmaes_impl.hpp b/include/ensmallen_bits/cmaes/cmaes_impl.hpp index 73a5c7f02..12380122e 100644 --- a/include/ensmallen_bits/cmaes/cmaes_impl.hpp +++ b/include/ensmallen_bits/cmaes/cmaes_impl.hpp @@ -31,14 +31,28 @@ CMAES::CMAES(const size_t lambda, const size_t maxIterations, const double tolerance, const SelectionPolicyType& selectionPolicy, - double stepSizeIn) : + double stepSizeIn, + const int maxFunctionEvaluations, + const double minObjective, + const size_t toleranceConditionCov, + const double toleranceNoEffectCoord, + const double toleranceNoEffectAxis, + const double toleranceRange, + const size_t tolerancePatienceRange): lambda(lambda), batchSize(batchSize), maxIterations(maxIterations), tolerance(tolerance), selectionPolicy(selectionPolicy), transformationPolicy(transformationPolicy), - stepSize(stepSizeIn) + stepSize(stepSizeIn), + maxFunctionEvaluations(maxFunctionEvaluations), + minObjective(minObjective), + toleranceConditionCov(toleranceConditionCov), + toleranceNoEffectCoord(toleranceNoEffectCoord), + toleranceNoEffectAxis(toleranceNoEffectAxis), + toleranceRange(toleranceRange), + toleranceRangePatience(toleranceRangePatience) { /* Nothing to do. */ } template @@ -49,13 +63,27 @@ CMAES::CMAES(const size_t lambda, const size_t maxIterations, const double tolerance, const SelectionPolicyType& selectionPolicy, - double stepSizeIn) : + double stepSizeIn, + const int maxFunctionEvaluations, + const double minObjective, + const size_t toleranceConditionCov, + const double toleranceNoEffectCoord, + const double toleranceNoEffectAxis, + const double toleranceRange, + const size_t toleranceRangePatience): lambda(lambda), batchSize(batchSize), maxIterations(maxIterations), tolerance(tolerance), selectionPolicy(selectionPolicy), - stepSize(stepSizeIn) + stepSize(stepSizeIn), + maxFunctionEvaluations(maxFunctionEvaluations), + minObjective(minObjective), + toleranceConditionCov(toleranceConditionCov), + toleranceNoEffectCoord(toleranceNoEffectCoord), + toleranceNoEffectAxis(toleranceNoEffectAxis), + toleranceRange(toleranceRange), + toleranceRangePatience(toleranceRangePatience) { Warn << "This is a deprecated constructor and will be removed in a " "future version of ensmallen" << std::endl; @@ -91,7 +119,16 @@ typename MatType::elem_type CMAES(120 + 30 * iterate.n_elem / lambda), + std::min(static_cast(0.2 * maxIterations), static_cast(20000)) + ); + + std::vector bestFitnessHistory; + std::vector medianFitnessHistory; // Parent weights. const size_t mu = std::round(lambda / 2); @@ -109,7 +146,7 @@ typename MatType::elem_type CMAES::infinity(); + double maxObjectiveInPatience = -std::numeric_limits::infinity(); for (size_t i = 1; (i != maxIterations) && !terminate; ++i) { @@ -235,6 +277,8 @@ typename MatType::elem_type CMAES 1e4 * std::sqrt(eigval.max())) + { + Info << "The step size ratio is too large; " + << "terminating optimization. Try a smaller step size?" << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + // Update covariance matrix. if ((psNorm / sqrt(1 - std::pow(1 - cs, 2 * i))) < h) { @@ -340,6 +395,9 @@ typename MatType::elem_type CMAES(currentObjective)); + maxObjectiveInPatience = std::max(maxObjectiveInPatience, static_cast(currentObjective)); + // Output current objective function. Info << "CMA-ES: iteration " << i << ", objective " << overallObjective << "." << std::endl; @@ -347,16 +405,17 @@ typename MatType::elem_type CMAES patience) { + if (steps > patience) + { Info << "CMA-ES: minimized within tolerance " << tolerance << "; " << "terminating optimization." << std::endl; @@ -365,10 +424,162 @@ typename MatType::elem_type CMAES toleranceRangePatience) + { + Info << "CMA-ES: range of function values below " << toleranceRange << + "; terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + stepsRange++; + } + else + { + minObjectiveInPatience = std::numeric_limits::infinity(); + maxObjectiveInPatience = -std::numeric_limits::infinity(); + stepsRange = 0; + } + + // Terminate if the maximum number of function evaluations has been reached. + if (functionEvaluations >= maxFunctionEvaluations) + { + Info << "CMA-ES: maximum number of function evaluations (" + << maxFunctionEvaluations << ") reached; " + << "terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + // Check if the condition number of the covariance matrix is too high. + if (eigval(eigval.n_elem - 1) / eigval(0) > toleranceConditionCov) + { + Info << "CMA-ES: covariance matrix condition number is too high; " + << "terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + // Terminate if adding 0.1 std * d * b does not change the m. + size_t i_aux = (i % iterate.n_elem); + BaseMatType norm_eigvec = eigvec.col(i_aux) / arma::norm(eigvec.col(i_aux), 2); + BaseMatType positionChange = arma::abs(mPosition[idx0] - (mPosition[idx0] + + 0.1 * sigma(idx0) * std::sqrt(eigval(i_aux)) * norm_eigvec[i_aux])); + + BaseMatType axisChange = arma::abs(mPosition[idx0] - (mPosition[idx0] + positionChange)); + + bool tooSmallChange = true; + for (size_t j = 0; j < axisChange.n_elem; ++j) + { + if (axisChange(j) >= toleranceNoEffectAxis) + { + tooSmallChange = false; + break; + } + } + + if (tooSmallChange) + { + Info << "CMA-ES: change in axis is too small; " + << "terminating optimization." << std::endl; + iterate = transformationPolicy.Transform(iterate); + + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + // Terminate if adding 0.2 std in each coordinate does not change the m. + for (size_t j = 0; j < iterate.n_elem; ++j) + { + double perturbation = 0.2 * sigma(idx0) * std::sqrt(C[idx0](j, j)); + + if (std::abs(mPosition[idx0](j) - (mPosition[idx0](j) + perturbation)) < toleranceNoEffectCoord) + { + Info << "CMA-ES: change in coordinate is too small; " + << "terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + } + + // Update histories for stagnation + bestFitnessHistory.push_back(pObjective(idx(0))); + + // Calculate median fitness + std::vector sortedFitness(pObjective.begin(), pObjective.end()); + std::nth_element(sortedFitness.begin(), sortedFitness.begin() + lambda / 2, sortedFitness.end()); + ElemType medianFitness = sortedFitness[lambda / 2]; + medianFitnessHistory.push_back(medianFitness); + + // Check for stagnation + if (bestFitnessHistory.size() >= historySize) + { + size_t third = historySize * 0.3; + + // Calculate medians for first and last 30% of history + std::vector firstThirdBest(bestFitnessHistory.begin(), + bestFitnessHistory.begin() + third); + std::vector lastThirdBest(bestFitnessHistory.end() - third, + bestFitnessHistory.end()); + std::vector firstThirdMedian(medianFitnessHistory.begin(), + medianFitnessHistory.begin() + third); + std::vector lastThirdMedian(medianFitnessHistory.end() - third, + medianFitnessHistory.end()); + + std::nth_element(firstThirdBest.begin(), firstThirdBest.begin() + third / 2, firstThirdBest.end()); + std::nth_element(lastThirdBest.begin(), lastThirdBest.begin() + third / 2, lastThirdBest.end()); + std::nth_element(firstThirdMedian.begin(), firstThirdMedian.begin() + third / 2, firstThirdMedian.end()); + std::nth_element(lastThirdMedian.begin(), lastThirdMedian.begin() + third / 2, lastThirdMedian.end()); + + ElemType medianFirstBest = firstThirdBest[third / 2]; + ElemType medianLastBest = lastThirdBest[third / 2]; + ElemType medianFirstMedian = firstThirdMedian[third / 2]; + ElemType medianLastMedian = lastThirdMedian[third / 2]; + + if (medianLastBest >= medianFirstBest && + medianLastMedian >= medianFirstMedian) + { + Info << "CMA-ES: Stagnation detected; " + << "terminating optimization." << std::endl; + + iterate = transformationPolicy.Transform(iterate); + Callback::EndOptimization(*this, function, iterate, callbacks...); + return overallObjective; + } + + // Remove oldest entry to maintain history size + bestFitnessHistory.erase(bestFitnessHistory.begin()); + medianFitnessHistory.erase(medianFitnessHistory.begin()); + } + + steps++; lastObjective = overallObjective;