Updated test and added doe output in csv

fast-aircraft-design · Dec 13, 2024 · 93a4bea · 93a4bea
1 parent 08a34f6
commit 93a4bea
Show file tree

Hide file tree

Showing 2 changed files with 162 additions and 183 deletions.
diff --git a/src/fastoad/cmd/doe.py b/src/fastoad/cmd/doe.py
@@ -12,7 +12,6 @@
 #  You should have received a copy of the GNU General Public License
 #  along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
-import os
 from dataclasses import dataclass, field
 from os import PathLike
 from typing import ClassVar, Dict, List, Optional, Union
@@ -23,11 +22,25 @@
 from smt.applications.mfk import NestedLHS
 
 import fastoad.api as oad
+from fastoad._utils.files import as_path
 from fastoad.openmdao.variables import VariableList
 
 
 @dataclass
 class DOEVariable:
+    """
+    Represents a Design of Experiments (DOE) variable defined by its unique `id_variable`.
+
+    :param name: The name of the FAST-OAD DOE variable.
+    :param bound_lower: The lower bound of the variable. Defaults to None.
+    :param bound_upper: The upper bound of the variable. Defaults to None.
+    :param reference_value: A reference value used to adjust bounds as percentages. If given, `bound_lower` and
+                            `bound_upper` are considered percentages. Defaults to None.
+    :param bind_variable_to: Another DOEVariable instance to bind this variable to. When bound, this variable
+                             inherits the bounds of the bound variable, and no new ID is assigned.
+    :param name_pseudo: An optional alias for the variable. If not provided, it defaults to the value of `name`.
+    """
+
     name: str
     bound_lower: Optional[float] = None
     bound_upper: Optional[float] = None
@@ -73,6 +86,29 @@ def __post_init__(self):
 
 @dataclass
 class DOEConfig:
+    """
+    Configuration and management of Design of Experiments (DOE) processes.
+
+    This class serves as a central entity for configuring and generating sampling points
+    for experiments using various DOE sampling methods using the SMT package. It manages the
+    input variables, their bounds, and the relationships between variables (e.g., binding one
+    variable to another). The class also supports reproducibility through random seeding. The
+    sampling obtained by running :meth:`sampling_doe` is formatted as `VariableList` instances,
+    making them directly usable by the `CalcRunner` class for computation.
+
+    :param sampling_method: The method used for sampling (e.g., LHS, Full Factorial, Random).
+                            This determines how sampling points are generated.
+    :param variables: A list of DOEVariable instances that represent the variables to be
+                    included in the experiment. Each variable defines its bounds,
+                    reference value, and optional bindings.
+    :param destination_folder: The folder where the generated DOE data will be saved. This
+                            ensures results are organized and easily accessible.
+    :param seed_value: A seed value for random number generation to ensure reproducibility
+                    of sampling. Defaults to 0.
+    :param sampling_options: An optional dictionary that stores additional parameters or
+                            options specific to the chosen sampling method. Defaults to empty dict.
+    """
+
     sampling_method: str
     variables: List[DOEVariable]
     destination_folder: Union[str, PathLike]
@@ -82,10 +118,12 @@ class DOEConfig:
     )  # This dict stores the eventual additional options for the chosen sampling method
 
     def __post_init__(self):
+        self.destination_folder = as_path(self.destination_folder).resolve()
         # Extract the necessary data to congigurate the DOE
         self.variables_binding = [var.id_variable for var in self.variables]
         self.var_names = [var.name for var in self.variables]
         self.var_names_pseudo = [var.name_pseudo for var in self.variables]
+        self.var_names_pseudo_mapping = dict(zip(self.var_names, self.var_names_pseudo))
         # Exctarct bounds taking into account binding
         seen = set()
         self.bounds = []
@@ -95,6 +133,7 @@ def __post_init__(self):
                 self.bounds.append([var.bound_lower, var.bound_upper])
         self.bounds = np.asarray(self.bounds)
 
+        self.is_sampled = False
         self.doe_points_multilevel = None
 
     def _handle_lhs(self, level_count=None):
@@ -106,56 +145,55 @@ def _handle_lhs(self, level_count=None):
     def _handle_full_factorial(self):
         return samp.FullFactorial(xlimits=self.bounds)
 
-    def _print_sampling_info(self):
-        pass
-
     def _write_doe_inputs(self):
-        file_name = "DOE_inputs"
-        column_names = self.var_names_pseudo
-        if self.doe_points_multilevel:
-            level_count = len(self.doe_points_multilevel) + 1
-            for i, point_list in enumerate(self.doe_points_multilevel):
-                # Rearrange columns based on the bounds_binding variable
-                doe_points_upd = point_list[:, self.bounds_binding]
-                doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=column_names)
-                doe_points_df_nest.to_csv(
-                    os.path.join(
-                        self.destination_folder, file_name + f"_{level_count}D_level{i}" + ".csv"
-                    ),
+        if self.is_sampled:
+            file_name = "DOE_inputs"
+            if self.doe_points_multilevel:
+                level_count = len(self.doe_points_multilevel)
+                for i, point_list in enumerate(self.doe_points_multilevel):
+                    # Rearrange columns based on the variables_binding variable
+                    doe_points_upd = point_list[:, self.variables_binding]
+                    doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=self.var_names_pseudo)
+                    doe_points_df_nest.to_csv(
+                        self.destination_folder / file_name + f"_{level_count}D_level{i}" + ".csv",
+                        index_label="ID",
+                        sep=";",
+                        quotechar="|",
+                    )
+            else:
+                doe_points = self.doe_points_df.rename(
+                    columns=self.var_names_pseudo_mapping
+                )  # Use pseudos for outputs
+                doe_points.to_csv(
+                    self.destination_folder / file_name + ".csv",
                     index_label="ID",
                     sep=";",
                     quotechar="|",
                 )
         else:
-            self.doe_points_df.to_csv(
-                os.path.join(self.destination_folder, file_name + ".csv"),
-                index_label="ID",
-                sep=";",
-                quotechar="|",
+            raise RuntimeError(
+                "You cannot call _write_doe_inputs without having performed the sampling."
             )
 
-    def generate_doe(self, sample_count, **kwargs) -> List[VariableList]:
+    def sampling_doe(self, sample_count) -> List[VariableList]:
         """
-        Generates a DOE input using the SMT library for sampling.
-
-        See SMT documentation for DOE options:
-        https://smt.readthedocs.io/en/latest/index.html
+        Generates sampling points for a Design of Experiments (DOE) using the SMT library.
 
-        :param
+        :param sample_count: The number of samples to generate.
 
-        :return: TODO.
+        :return: A list of `oad.VariableList` objects containing the generated sampling points.
         """
         method = self.sampling_method
         level_count = None
         use_level = None
-        if kwargs.get("level_count"):
-            level_count = kwargs.get("level_count")
-            use_level = kwargs.get("use_level")
+        if self.sampling_options:
+            if self.sampling_options.get("level_count"):
+                level_count = self.sampling_options.get("level_count")
+                use_level = self.sampling_options.get("use_level")
 
         method_dispatch = {
             "LHS": lambda: self._handle_lhs(level_count),
             "Full Factorial": self._handle_full_factorial,
-            "Random": self._handle_random,
         }
         handler = method_dispatch.get(method)
         if handler is None:
@@ -173,160 +211,15 @@ def generate_doe(self, sample_count, **kwargs) -> List[VariableList]:
             doe_points = doe_points[use_level]
         doe_points_upd = doe_points[
             :, self.variables_binding
-        ]  # Rearrange columns based on the bounds_binding variable
+        ]  # Rearrange columns based on the variables_binding variable
         self.doe_points_df = pd.DataFrame(doe_points_upd, columns=column_names)
         doe_points_dict = self.doe_points_df.to_dict(orient="records")
 
-        return [  # Good format for calc-runner
+        self.is_sampled = True
+
+        return [  # Good format for CalcRunner
             oad.VariableList(
                 [oad.Variable(var_name, val=var_value) for var_name, var_value in doe_point.items()]
             )
             for doe_point in doe_points_dict
         ]
-
-    def write_output(self):
-        pass
-
-
-# def gen_doe(
-#     var_names: list,
-#     var_bounds: list,
-#     n_samples: int,
-#     var_names_pseudo: dict = None,
-#     var_bounds_percentage_mode: bool = True,
-#     bounds_binding: Optional[list] = None,
-#     sampling_method: str = "LHS",
-#     level_count: Optional[int] = None,
-#     use_level: Optional[int] = None,
-#     seed_value: int = 12,
-#     folder_path: str = None,
-# ) -> list[oad.Variable]:
-#     """
-#     Generates a DOE input using the SMT library for sampling.
-
-#     See SMT documentation for DOE options:
-#     https://smt.readthedocs.io/en/latest/index.html
-
-#     :param var_name: list, names of the input variables used to create the DOE
-#     :param var_bounds: list, upper and lower bounds for the DOE in the same order of var_names.
-#     If var_bounds_percentage_mode = True, then the format is [mean, percentageDifference]; else is [lowerBound, upperBound]
-#     :param n_samples: int, number of sampes generated by the DOE.
-#     If sampling_method = "FullFactorial", then n_samples represent the number of samples per input, so that the total number of sampling is n_samples**n_var
-#     :param var_names_pseudo: dict, TODO
-#     :param folder_path: Path, path in which the DOE .csv is saved
-#     :param file_name: str, custom name of the DOE .csv
-#     :param var_bounds_percentage_mode: bool, if true the function evaluates the bounds of the variables as a percentage of the given value
-#     :param sampling_method: str, "LHS", "FullFactorial", or "Random". SMT sampling method for the DOE generation
-#     :param random_state: int, seed number to control random draws for sampling method (set by default to get reproductibility)
-
-#     :return: TODO.
-#     """
-#     print("\n--------------------")
-#     print("-- DOE GENERATION --")
-#     print("--------------------\n")
-
-#     if bounds_binding:
-#         # Check on the list given
-#         max_num = max(bounds_binding)  # Get the maximum number in the list
-#         expected_set = set(range(max_num + 1))  # Create set of all numbers from 0 to max_num
-#         actual_set = set(bounds_binding)  # Create a set from the list to remove duplicates
-#         if not expected_set.issubset(actual_set):
-#             raise ValueError(
-#                 f"bounds_binding: {bounds_binding} is not a list containing all the int between 0 and n_inputs."
-#             )
-#         n_inputs = max(bounds_binding) + 1
-#         n_var = len(var_names)
-#     else:
-#         n_inputs = len(var_names)
-#         bounds_binding = [i for i in range(n_inputs)]
-#         n_var = len(var_names)
-
-#     if var_bounds_percentage_mode:
-#         xbounds = []
-#         for mean, percentage in var_bounds:
-#             reduction = mean - (mean * percentage / 100)  # Reduce mean by percentage
-#             increase = mean + (mean * percentage / 100)  # Increase mean by percentage
-#             xbounds.append([reduction, increase])
-#         xbounds = np.array(xbounds)
-#     else:
-#         xbounds = []
-#         for down, up in var_bounds:
-#             if down >= up:
-#                 raise ValueError(f"Invalid bounds: ({down}) should not be greater than({up})")
-#             xbounds.append([down, up])
-#         xbounds = np.array(xbounds)
-
-#     print("--   VARIABLE BOUNDS    --")
-#     for i in range(n_var):
-#         print(
-#             f"Name = {var_names[i]} | Bounds = {xbounds[bounds_binding[i],0]}   {xbounds[bounds_binding[i],1]}"
-#         )
-
-#     use_nested = False
-#     if sampling_method == "LHS" and level_count:
-#         use_nested = True
-
-#     if sampling_method == "LHS":
-#         if level_count:
-#             sampling = NestedLHS(nlevel=level_count, xlimits=xbounds, random_state=random_state)
-#             print("\n-- NESTED LHS SAMPLING --\n")
-#         else:
-#             sampling = samp.LHS(criterion="ese", random_state=random_state, xlimits=xbounds)
-#             print("\n--   LHS SAMPLING   --\n")
-#         doe_points = sampling(n_samples)  # do the sampling
-#     elif sampling_method == "FullFactorial":
-#         n_samples = n_samples**n_inputs
-#         sampling = samp.FullFactorial(xlimits=xbounds)
-#         doe_points = sampling(n_samples)  # do the sampling
-#         print("\n-- FULL FACTORIAL SAMPLING --\n")
-#     elif sampling_method == "Random":
-#         sampling = samp.Random(
-#             xlimits=xbounds,
-#             random_state=random_state,
-#         )
-#         doe_points = sampling(n_samples)  # do the sampling
-#         print("\n--   RANDOM SAMPLING   --\n")
-#     else:
-#         raise ValueError(
-#             f"The sampling method {sampling_method} is not existent or not yet implemented."
-#         )
-
-#     if var_names_pseudo:
-#         column_names = [key for key, val in var_names_pseudo.items()]
-#     else:
-#         column_names = [var_name for var_name in var_names]
-#     if not folder_path:
-#         folder_path = Path(__file__).parent
-#     if not os.path.exists(folder_path):
-#         os.makedirs(folder_path)
-
-#     if use_nested:
-#         for i, point_list in enumerate(doe_points):
-#             # Rearrange columns based on the bounds_binding variable
-#             doe_points_upd = point_list[:, bounds_binding]
-#             doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=column_names)
-#             doe_points_df_nest.to_csv(
-#                 os.path.join(folder_path, file_name + f"_{level_count}D_level{i}" + ".csv"),
-#                 index_label="ID",
-#                 sep=";",
-#             )
-#             if i == use_level:
-#                 doe_points_df = point_list.copy()
-#     else:
-#         # Rearrange columns based on the bounds_binding variable
-#         doe_points_upd = doe_points[:, bounds_binding]
-#         doe_points_df = pd.DataFrame(doe_points_upd, columns=column_names)
-#         doe_points_df.to_csv(
-#             os.path.join(folder_path, file_name + ".csv"), index_label="ID", sep=";", quotechar="|"
-#         )
-
-#     doe_points_dict = doe_points_df.to_dict(orient="records")
-#     doe_data = [  # good format for calc-runner
-#         oad.VariableList(
-#             [oad.Variable(var_name, val=var_value) for var_name, var_value in doe_point.items()]
-#         )
-#         for doe_point in doe_points_dict
-#     ]
-#     print("--   DOE GENERATED   --")
-
-#     return doe_data