IPOP and BIPOP restart mechanisms (#403)

mlpack · Nov 29, 2024 · 6ac2f6b · 6ac2f6b
1 parent 7bff766
commit 6ac2f6b
Show file tree

Hide file tree

Showing 10 changed files with 661 additions and 11 deletions.
diff --git a/HISTORY.md b/HISTORY.md
@@ -7,7 +7,11 @@
    ([#404](https://github.com/mlpack/ensmallen/pull/404)).
 
  * For Armadillo 14.2.0 switch to `.index_min()` and `.index_max()`
-   ([#409](https://github.com/mlpack/ensmallen/pull/409).
+   ([#409](https://github.com/mlpack/ensmallen/pull/409)).
+
+ * Added IPOP and BIPOP restart mechanisms for CMA-ES.
+   ([#403](https://github.com/mlpack/ensmallen/pull/403)).
+
 
 ### ensmallen 2.21.1: "Bent Antenna"
 ###### 2024-02-15

diff --git a/doc/function_types.md b/doc/function_types.md
@@ -358,7 +358,10 @@ Each of the implemented methods is allowed to have additional cv-modifiers
 
 The following optimizers can be used with arbitrary separable functions:
 
- - [CMAES](#cmaes)
+ - [Active CMA-ES](#active-cma-es)
+ - [BIPOP CMA-ES](#bipop-cma-es)
+ - [CMA-ES](#cma-es)
+ - [IPOP CMA-ES](#ipop-cma-es)
 
 Each of these optimizers has an `Optimize()` function that is called as
 `Optimize(f, x)` where `f` is the function to be optimized and `x` holds the
@@ -453,15 +456,15 @@ int main()
   // parameters, so the shape is 10x1.
   arma::mat params(10, 1, arma::fill::randn);
 
-  // Use the CMAES optimizer with default parameters to minimize the
+  // Use the CMA-ES optimizer with default parameters to minimize the
   // LinearRegressionFunction.
   // The ens::CMAES type can be replaced with any suitable ensmallen optimizer
   // that can handle arbitrary separable functions.
   ens::CMAES cmaes;
   LinearRegressionFunction lrf(data, responses);
   cmaes.Optimize(lrf, params);
 
-  std::cout << "The optimized linear regression model found by CMAES has the "
+  std::cout << "The optimized linear regression model found by CMA-ES has the "
       << "parameters " << params.t();
 }
 ```

diff --git a/doc/optimizers.md b/doc/optimizers.md
@@ -1,4 +1,4 @@
-## ActiveCMAES
+## Active CMA-ES
 
 *An optimizer for [separable functions](#separable-functions).*
 
@@ -72,12 +72,12 @@ the coordinates respectively.
 RosenbrockFunction f;
 arma::mat coordinates = f.GetInitialPoint();
 
-// ActiveCMAES with the FullSelection and BoundaryBoxConstraint policies.
+// Active CMA-ES with the FullSelection and BoundaryBoxConstraint policies.
 BoundaryBoxConstraint b(-1, 1);
 ActiveCMAES optimizer(0, b, 32, 200, 1e-4);
 optimizer.Optimize(f, coordinates);
 
-// ActiveCMAES with the RandomSelection and BoundaryBoxConstraint policies.
+// Active CMA-ES with the RandomSelection and BoundaryBoxConstraint policies.
 ApproxActiveCMAES<BoundaryBoxConstraint<>> cmaes(0, b, 32, 200, 1e-4);
 approxOptimizer.Optimize(f, coordinates);
 ```
@@ -86,7 +86,9 @@ approxOptimizer.Optimize(f, coordinates);
 
 #### See also:
 
- * [CMAES](#cmaes)
+ * [BIPOP CMA-ES](#bipop-cma-es)
+ * [CMA-ES](#cma-es)
+ * [IPOP CMA-ES](#ipop-cma-es)
  * [Improving Evolution Strategies through Active Covariance Matrix Adaptation](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.114.4239&rep=rep1&type=pdfn)
  * [Evolution strategy in Wikipedia](https://en.wikipedia.org/wiki/Evolution_strategy)
 
@@ -869,6 +871,78 @@ optimizer2.Optimize(f, coordinates);
  * [SGD in Wikipedia](https://en.wikipedia.org/wiki/Stochastic_gradient_descent)
  * [SGD](#standard-sgd)
 
+## BIPOP CMA-ES
+
+*An optimizer for [separable functions](#separable-functions).*
+
+BIPOP CMA-ES (Bi-Population CMA-ES) extends the idea of [IPOP CMA-ES](#ipop-cma-es) by using
+two intertwined restart strategies: one with an increasing population size and
+another maintaining a smaller, variable population size. This strategy allows
+BIPOP CMA-ES to adaptively balance exploration and exploitation across the
+fitness landscape, and can outperform IPOP. The larger population restarts aim
+to explore broadly, improving global search capabilities, while the smaller
+populations intensify the search in promising regions.
+
+### Constructors
+
+* `BIPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>()`
+* `BIPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy`_`)`
+* `BIPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance`_`)`
+* `BIPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxRestarts, populationFactor, maxFunctionEvaluations`_`)`
+
+### Attributes
+
+| **type** | **name** | **description** | **default** |
+|----------|----------|-----------------|-------------|
+| `size_t` | **`lambda`** | The population size (0 uses a default size). | `0` |
+| `TransformationPolicyType` | **`transformationPolicy`** | Instantiated transformation policy used to map the coordinates to the desired domain. | `TransformationPolicyType()` |
+| `size_t` | **`batchSize`** | Batch size to use for the objective calculation. | `32` |
+| `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` |
+| `double` | **`tolerance`** | Maximum absolute tolerance to terminate the algorithm. | `1e-5` |
+| `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` |
+| `double` | **`stepSize`** | Initial step size. | `0` |
+| `size_t` | **`maxRestarts`** | Maximum number of restarts. | `9` |
+| `double` | **`populationFactor`** | The factor by which the population increases after each restart. | `2.0` |
+| `size_t` | **`maxFunctionEvaluations`** | Maximum number of function evaluations. | `1e9` |
+
+The _`SelectionPolicyType`_ and _`TransformationPolicyType`_ template parameters
+are used to control how the selection of points and the transformation of the
+decision variables are handled during the optimization process. For more
+information, check the [`CMAES`](#cma-es) class. The `lambda`,
+`transformationPolicy`, `batchSize`, `maxIterations`, `tolerance`,
+`selectionPolicy`, and `stepSize` are all parameters that affect the underlying
+CMA-ES process.
+
+For convenience, the following type can be used:
+
+ * **`BIPOP_CMAES<>`** (equivalent to `BIPOP_CMAES<FullSelection, EmptyTransformation<>>`): uses all separable functions to compute objective
+
+#### Examples:
+
+<details open>
+<summary>Click to collapse/expand example code.
+</summary>
+
+```c++
+RosenbrockFunction f;
+arma::mat coordinates = f.GetInitialPoint();
+
+// BIPOP CMA-ES
+BoundaryBoxConstraint<> b(-1, 1);
+BIPOP_CMAES optimizer(0, b, 32, 200, 1e-4, 5, 2, 9, 1e4);
+Optimizer.Optimize(f, coordinates);
+```
+
+</details>
+
+#### See also:
+
+ * [CMA-ES](#cma-es)
+ * [Active CMA-ES](#active-cma-es)
+ * [IPOP CMA-ES](#ipop-cma-es)
+ * [Benchmarking a BI-Population CMA-ES on the BBOB-2009 Function Testbed](https://dl.acm.org/doi/pdf/10.1145/1570256.1570333)
+ * [Evolution strategy in Wikipedia](https://en.wikipedia.org/wiki/Evolution_strategy)
+
 ## Coordinate Descent (CD)
 
 *An optimizer for [partially differentiable functions](#partially-differentiable-functions).*
@@ -945,7 +1019,7 @@ cyclicscd.Optimize(f, coordinates);
  * [Stochastic Methods for L1-Regularized Loss Minimization](https://www.jmlr.org/papers/volume12/shalev-shwartz11a/shalev-shwartz11a.pdf)
  * [Partially differentiable functions](#partially-differentiable-functions)
 
-## CMAES
+## CMA-ES
 
 *An optimizer for [separable functions](#separable-functions).*
 
@@ -1015,12 +1089,12 @@ the coordinates respectively.
 RosenbrockFunction f;
 arma::mat coordinates = f.GetInitialPoint();
 
-// CMAES with the FullSelection and BoundaryBoxConstraint policies.
+// CMA-ES with the FullSelection and BoundaryBoxConstraint policies.
 BoundaryBoxConstraint b(-1, 1);
 CMAES optimizer(0, b, 32, 200, 1e-4);
 optimizer.Optimize(f, coordinates);
 
-// CMAES with the RandomSelection and BoundaryBoxConstraint policies.
+// CMA-ES with the RandomSelection and BoundaryBoxConstraint policies.
 ApproxCMAES<BoundaryBoxConstraint<>> cmaes(0, b, 32, 200, 1e-4);
 approxOptimizer.Optimize(f, coordinates);
 ```
@@ -1029,6 +1103,9 @@ approxOptimizer.Optimize(f, coordinates);
 
 #### See also:
 
+ * [Active CMAES](#active-cma-es)
+ * [BIPOP CMA-ES](#bipop-cma-es)
+ * [IPOP CMA-ES](#ipop-cma-es)
  * [Completely Derandomized Self-Adaptation in Evolution Strategies](http://www.cmap.polytechnique.fr/~nikolaus.hansen/cmaartic.pdf)
  * [CMA-ES in Wikipedia](https://en.wikipedia.org/wiki/CMA-ES)
  * [Evolution strategy in Wikipedia](https://en.wikipedia.org/wiki/Evolution_strategy)
@@ -1575,6 +1652,79 @@ optimizer.Optimize(f, coordinates);
  * [HOGWILD!: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent](https://arxiv.org/abs/1106.5730)
  * [Sparse differentiable separable functions](#sparse-differentiable-separable-functions)
 
+## IPOP CMA-ES
+
+*An optimizer for [separable functions](#separable-functions).*
+
+IPOP CMA-ES (Increasing Population Size CMA-ES) is an extension of the
+Covariance Matrix Adaptation Evolution Strategy (CMA-ES). It introduces a 
+restart mechanism that progressively increases the population size. This 
+approach is beneficial for optimizing multi-modal functions,
+characterized by numerous local optima. The restart mechanism is designed to 
+improve the adaptability of CMA-ES by improving the likelihood of escaping
+local optima, thus increasing the chances of discovering the global optimum.
+
+### Constructors
+
+* `IPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>()`
+* `IPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy`_`)`
+* `IPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance`_`)`
+* `IPOP_CMAES<`_`SelectionPolicyType, TransformationPolicyType`_`>(`_`lambda, transformationPolicy, batchSize, maxIterations, tolerance, selectionPolicy, stepSize, maxRestarts, populationFactor, maxFunctionEvaluations`_`)`
+
+### Attributes
+
+| **type** | **name** | **description** | **default** |
+|----------|----------|-----------------|-------------|
+| `size_t` | **`lambda`** | The population size (0 uses a default size). | `0` |
+| `TransformationPolicyType` | **`transformationPolicy`** | Instantiated transformation policy used to map the coordinates to the desired domain. | `TransformationPolicyType()` |
+| `size_t` | **`batchSize`** | Batch size to use for the objective calculation. | `32` |
+| `size_t` | **`maxIterations`** | Maximum number of iterations. | `1000` |
+| `double` | **`tolerance`** | Maximum absolute tolerance to terminate the algorithm. | `1e-5` |
+| `SelectionPolicyType` | **`selectionPolicy`** | Instantiated selection policy used to calculate the objective. | `SelectionPolicyType()` |
+| `double` | **`stepSize`** | Initial step size. | `0` |
+| `size_t` | **`maxRestarts`** | Maximum number of restarts. | `9` |
+| `double` | **`populationFactor`** | The factor by which the population increases after each restart. | `2.0` |
+| `size_t` | **`maxFunctionEvaluations`** | Maximum number of function evaluations. | `1e9` |
+
+The _`SelectionPolicyType`_ and _`TransformationPolicyType`_ template parameters
+are used to control how the selection of points and the transformation of the
+decision variables are handled during the optimization process. For more
+information, check the [`CMAES`](#cma-es) class. The `lambda`,
+`transformationPolicy`, `batchSize`, `maxIterations`, `tolerance`,
+`selectionPolicy`, and `stepSize` are all parameters that affect the underlying
+CMA-ES process.
+
+For convenience, the following type can be used:
+
+ * **`IPOP_CMAES<>`** (equivalent to `IPOP_CMAES<FullSelection, EmptyTransformation<>>`): uses all separable functions to compute objective
+
+#### Examples:
+
+<details open>
+<summary>Click to collapse/expand example code.
+</summary>
+
+```c++
+RosenbrockFunction f;
+arma::mat coordinates = f.GetInitialPoint();
+
+// IPOP CMA-ES
+BoundaryBoxConstraint<> b(-1, 1);
+IPOP_CMAES optimizer(0, b, 32, 200, 1e-4, 5, 2, 9, 1e4);
+Optimizer.Optimize(f, coordinates);
+```
+
+</details>
+
+#### See also:
+
+ * [CMA-ES](#cmaes)
+ * [Active CMA-ES](#active-cma-es)
+ * [BIPOP CMA-ES](#bipop-cma-es)
+ * [A Restart CMA Evolution Strategy With Increasing Population Size](http://www.cmap.polytechnique.fr/~nikolaus.hansen/cec2005ipopcmaes.pdf)
+ * [Evolution strategy in Wikipedia](https://en.wikipedia.org/wiki/Evolution_strategy)
+
+
 ## IQN
 
 *An optimizer for [differentiable separable functions](#differentiable-separable-functions).*

diff --git a/include/ensmallen.hpp b/include/ensmallen.hpp
@@ -104,6 +104,7 @@
 #include "ensmallen_bits/bigbatch_sgd/bigbatch_sgd.hpp"
 #include "ensmallen_bits/cmaes/cmaes.hpp"
 #include "ensmallen_bits/cmaes/active_cmaes.hpp"
+#include "ensmallen_bits/cmaes/pop_cmaes.hpp"
 #include "ensmallen_bits/cd/cd.hpp"
 #include "ensmallen_bits/cne/cne.hpp"
 #include "ensmallen_bits/de/de.hpp"

diff --git a/include/ensmallen_bits/cmaes/cmaes.hpp b/include/ensmallen_bits/cmaes/cmaes.hpp
@@ -172,6 +172,9 @@ class CMAES
   double& StepSize()
   { return stepSize; }
 
+  //! Get the total number of function evaluations.
+  size_t FunctionEvaluations() const  { return functionEvaluations; }
+
  private:
   //! Population size.
   size_t lambda;
@@ -195,6 +198,9 @@ class CMAES
 
   //! The step size.
   double stepSize;
+
+  //! Counter for the number of function evaluations.
+  size_t functionEvaluations = 0;
 };
 
 /**

diff --git a/include/ensmallen_bits/cmaes/cmaes_impl.hpp b/include/ensmallen_bits/cmaes/cmaes_impl.hpp
@@ -151,6 +151,7 @@ typename MatType::elem_type CMAES<SelectionPolicyType,
     terminate |= Callback::Evaluate(*this, function, transformedIterate,
         objective, callbacks...);
   }
+  functionEvaluations += numFunctions;
 
   ElemType overallObjective = currentObjective;
   ElemType lastObjective = std::numeric_limits<ElemType>::max();
@@ -235,6 +236,8 @@ typename MatType::elem_type CMAES<SelectionPolicyType,
         transformationPolicy.Transform(mPosition[idx1]), terminate,
         callbacks...);
 
+    functionEvaluations += lambda; 
+
     // Update best parameters.
     if (currentObjective < overallObjective)
     {