Skip to content

Commit

Permalink
Updated test and added doe output in csv
Browse files Browse the repository at this point in the history
  • Loading branch information
Enrico Stragiotti committed Dec 13, 2024
1 parent 08a34f6 commit 93a4bea
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 183 deletions.
255 changes: 74 additions & 181 deletions src/fastoad/cmd/doe.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import os
from dataclasses import dataclass, field
from os import PathLike
from typing import ClassVar, Dict, List, Optional, Union
Expand All @@ -23,11 +22,25 @@
from smt.applications.mfk import NestedLHS

import fastoad.api as oad
from fastoad._utils.files import as_path
from fastoad.openmdao.variables import VariableList


@dataclass
class DOEVariable:
"""
Represents a Design of Experiments (DOE) variable defined by its unique `id_variable`.
:param name: The name of the FAST-OAD DOE variable.
:param bound_lower: The lower bound of the variable. Defaults to None.
:param bound_upper: The upper bound of the variable. Defaults to None.
:param reference_value: A reference value used to adjust bounds as percentages. If given, `bound_lower` and
`bound_upper` are considered percentages. Defaults to None.
:param bind_variable_to: Another DOEVariable instance to bind this variable to. When bound, this variable
inherits the bounds of the bound variable, and no new ID is assigned.
:param name_pseudo: An optional alias for the variable. If not provided, it defaults to the value of `name`.
"""

name: str
bound_lower: Optional[float] = None
bound_upper: Optional[float] = None
Expand Down Expand Up @@ -73,6 +86,29 @@ def __post_init__(self):

@dataclass
class DOEConfig:
"""
Configuration and management of Design of Experiments (DOE) processes.
This class serves as a central entity for configuring and generating sampling points
for experiments using various DOE sampling methods using the SMT package. It manages the
input variables, their bounds, and the relationships between variables (e.g., binding one
variable to another). The class also supports reproducibility through random seeding. The
sampling obtained by running :meth:`sampling_doe` is formatted as `VariableList` instances,
making them directly usable by the `CalcRunner` class for computation.
:param sampling_method: The method used for sampling (e.g., LHS, Full Factorial, Random).
This determines how sampling points are generated.
:param variables: A list of DOEVariable instances that represent the variables to be
included in the experiment. Each variable defines its bounds,
reference value, and optional bindings.
:param destination_folder: The folder where the generated DOE data will be saved. This
ensures results are organized and easily accessible.
:param seed_value: A seed value for random number generation to ensure reproducibility
of sampling. Defaults to 0.
:param sampling_options: An optional dictionary that stores additional parameters or
options specific to the chosen sampling method. Defaults to empty dict.
"""

sampling_method: str
variables: List[DOEVariable]
destination_folder: Union[str, PathLike]
Expand All @@ -82,10 +118,12 @@ class DOEConfig:
) # This dict stores the eventual additional options for the chosen sampling method

def __post_init__(self):
self.destination_folder = as_path(self.destination_folder).resolve()
# Extract the necessary data to congigurate the DOE
self.variables_binding = [var.id_variable for var in self.variables]
self.var_names = [var.name for var in self.variables]
self.var_names_pseudo = [var.name_pseudo for var in self.variables]
self.var_names_pseudo_mapping = dict(zip(self.var_names, self.var_names_pseudo))
# Exctarct bounds taking into account binding
seen = set()
self.bounds = []
Expand All @@ -95,6 +133,7 @@ def __post_init__(self):
self.bounds.append([var.bound_lower, var.bound_upper])
self.bounds = np.asarray(self.bounds)

self.is_sampled = False
self.doe_points_multilevel = None

def _handle_lhs(self, level_count=None):
Expand All @@ -106,56 +145,55 @@ def _handle_lhs(self, level_count=None):
def _handle_full_factorial(self):
return samp.FullFactorial(xlimits=self.bounds)

def _print_sampling_info(self):
pass

def _write_doe_inputs(self):
file_name = "DOE_inputs"
column_names = self.var_names_pseudo
if self.doe_points_multilevel:
level_count = len(self.doe_points_multilevel) + 1
for i, point_list in enumerate(self.doe_points_multilevel):
# Rearrange columns based on the bounds_binding variable
doe_points_upd = point_list[:, self.bounds_binding]
doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=column_names)
doe_points_df_nest.to_csv(
os.path.join(
self.destination_folder, file_name + f"_{level_count}D_level{i}" + ".csv"
),
if self.is_sampled:
file_name = "DOE_inputs"
if self.doe_points_multilevel:
level_count = len(self.doe_points_multilevel)
for i, point_list in enumerate(self.doe_points_multilevel):
# Rearrange columns based on the variables_binding variable
doe_points_upd = point_list[:, self.variables_binding]
doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=self.var_names_pseudo)
doe_points_df_nest.to_csv(
self.destination_folder / file_name + f"_{level_count}D_level{i}" + ".csv",
index_label="ID",
sep=";",
quotechar="|",
)
else:
doe_points = self.doe_points_df.rename(
columns=self.var_names_pseudo_mapping
) # Use pseudos for outputs
doe_points.to_csv(
self.destination_folder / file_name + ".csv",
index_label="ID",
sep=";",
quotechar="|",
)
else:
self.doe_points_df.to_csv(
os.path.join(self.destination_folder, file_name + ".csv"),
index_label="ID",
sep=";",
quotechar="|",
raise RuntimeError(
"You cannot call _write_doe_inputs without having performed the sampling."
)

def generate_doe(self, sample_count, **kwargs) -> List[VariableList]:
def sampling_doe(self, sample_count) -> List[VariableList]:
"""
Generates a DOE input using the SMT library for sampling.
See SMT documentation for DOE options:
https://smt.readthedocs.io/en/latest/index.html
Generates sampling points for a Design of Experiments (DOE) using the SMT library.
:param
:param sample_count: The number of samples to generate.
:return: TODO.
:return: A list of `oad.VariableList` objects containing the generated sampling points.
"""
method = self.sampling_method
level_count = None
use_level = None
if kwargs.get("level_count"):
level_count = kwargs.get("level_count")
use_level = kwargs.get("use_level")
if self.sampling_options:
if self.sampling_options.get("level_count"):
level_count = self.sampling_options.get("level_count")
use_level = self.sampling_options.get("use_level")

method_dispatch = {
"LHS": lambda: self._handle_lhs(level_count),
"Full Factorial": self._handle_full_factorial,
"Random": self._handle_random,
}
handler = method_dispatch.get(method)
if handler is None:
Expand All @@ -173,160 +211,15 @@ def generate_doe(self, sample_count, **kwargs) -> List[VariableList]:
doe_points = doe_points[use_level]
doe_points_upd = doe_points[
:, self.variables_binding
] # Rearrange columns based on the bounds_binding variable
] # Rearrange columns based on the variables_binding variable
self.doe_points_df = pd.DataFrame(doe_points_upd, columns=column_names)
doe_points_dict = self.doe_points_df.to_dict(orient="records")

return [ # Good format for calc-runner
self.is_sampled = True

return [ # Good format for CalcRunner
oad.VariableList(
[oad.Variable(var_name, val=var_value) for var_name, var_value in doe_point.items()]
)
for doe_point in doe_points_dict
]

def write_output(self):
pass


# def gen_doe(
# var_names: list,
# var_bounds: list,
# n_samples: int,
# var_names_pseudo: dict = None,
# var_bounds_percentage_mode: bool = True,
# bounds_binding: Optional[list] = None,
# sampling_method: str = "LHS",
# level_count: Optional[int] = None,
# use_level: Optional[int] = None,
# seed_value: int = 12,
# folder_path: str = None,
# ) -> list[oad.Variable]:
# """
# Generates a DOE input using the SMT library for sampling.

# See SMT documentation for DOE options:
# https://smt.readthedocs.io/en/latest/index.html

# :param var_name: list, names of the input variables used to create the DOE
# :param var_bounds: list, upper and lower bounds for the DOE in the same order of var_names.
# If var_bounds_percentage_mode = True, then the format is [mean, percentageDifference]; else is [lowerBound, upperBound]
# :param n_samples: int, number of sampes generated by the DOE.
# If sampling_method = "FullFactorial", then n_samples represent the number of samples per input, so that the total number of sampling is n_samples**n_var
# :param var_names_pseudo: dict, TODO
# :param folder_path: Path, path in which the DOE .csv is saved
# :param file_name: str, custom name of the DOE .csv
# :param var_bounds_percentage_mode: bool, if true the function evaluates the bounds of the variables as a percentage of the given value
# :param sampling_method: str, "LHS", "FullFactorial", or "Random". SMT sampling method for the DOE generation
# :param random_state: int, seed number to control random draws for sampling method (set by default to get reproductibility)

# :return: TODO.
# """
# print("\n--------------------")
# print("-- DOE GENERATION --")
# print("--------------------\n")

# if bounds_binding:
# # Check on the list given
# max_num = max(bounds_binding) # Get the maximum number in the list
# expected_set = set(range(max_num + 1)) # Create set of all numbers from 0 to max_num
# actual_set = set(bounds_binding) # Create a set from the list to remove duplicates
# if not expected_set.issubset(actual_set):
# raise ValueError(
# f"bounds_binding: {bounds_binding} is not a list containing all the int between 0 and n_inputs."
# )
# n_inputs = max(bounds_binding) + 1
# n_var = len(var_names)
# else:
# n_inputs = len(var_names)
# bounds_binding = [i for i in range(n_inputs)]
# n_var = len(var_names)

# if var_bounds_percentage_mode:
# xbounds = []
# for mean, percentage in var_bounds:
# reduction = mean - (mean * percentage / 100) # Reduce mean by percentage
# increase = mean + (mean * percentage / 100) # Increase mean by percentage
# xbounds.append([reduction, increase])
# xbounds = np.array(xbounds)
# else:
# xbounds = []
# for down, up in var_bounds:
# if down >= up:
# raise ValueError(f"Invalid bounds: ({down}) should not be greater than({up})")
# xbounds.append([down, up])
# xbounds = np.array(xbounds)

# print("-- VARIABLE BOUNDS --")
# for i in range(n_var):
# print(
# f"Name = {var_names[i]} | Bounds = {xbounds[bounds_binding[i],0]} {xbounds[bounds_binding[i],1]}"
# )

# use_nested = False
# if sampling_method == "LHS" and level_count:
# use_nested = True

# if sampling_method == "LHS":
# if level_count:
# sampling = NestedLHS(nlevel=level_count, xlimits=xbounds, random_state=random_state)
# print("\n-- NESTED LHS SAMPLING --\n")
# else:
# sampling = samp.LHS(criterion="ese", random_state=random_state, xlimits=xbounds)
# print("\n-- LHS SAMPLING --\n")
# doe_points = sampling(n_samples) # do the sampling
# elif sampling_method == "FullFactorial":
# n_samples = n_samples**n_inputs
# sampling = samp.FullFactorial(xlimits=xbounds)
# doe_points = sampling(n_samples) # do the sampling
# print("\n-- FULL FACTORIAL SAMPLING --\n")
# elif sampling_method == "Random":
# sampling = samp.Random(
# xlimits=xbounds,
# random_state=random_state,
# )
# doe_points = sampling(n_samples) # do the sampling
# print("\n-- RANDOM SAMPLING --\n")
# else:
# raise ValueError(
# f"The sampling method {sampling_method} is not existent or not yet implemented."
# )

# if var_names_pseudo:
# column_names = [key for key, val in var_names_pseudo.items()]
# else:
# column_names = [var_name for var_name in var_names]
# if not folder_path:
# folder_path = Path(__file__).parent
# if not os.path.exists(folder_path):
# os.makedirs(folder_path)

# if use_nested:
# for i, point_list in enumerate(doe_points):
# # Rearrange columns based on the bounds_binding variable
# doe_points_upd = point_list[:, bounds_binding]
# doe_points_df_nest = pd.DataFrame(doe_points_upd, columns=column_names)
# doe_points_df_nest.to_csv(
# os.path.join(folder_path, file_name + f"_{level_count}D_level{i}" + ".csv"),
# index_label="ID",
# sep=";",
# )
# if i == use_level:
# doe_points_df = point_list.copy()
# else:
# # Rearrange columns based on the bounds_binding variable
# doe_points_upd = doe_points[:, bounds_binding]
# doe_points_df = pd.DataFrame(doe_points_upd, columns=column_names)
# doe_points_df.to_csv(
# os.path.join(folder_path, file_name + ".csv"), index_label="ID", sep=";", quotechar="|"
# )

# doe_points_dict = doe_points_df.to_dict(orient="records")
# doe_data = [ # good format for calc-runner
# oad.VariableList(
# [oad.Variable(var_name, val=var_value) for var_name, var_value in doe_point.items()]
# )
# for doe_point in doe_points_dict
# ]
# print("-- DOE GENERATED --")

# return doe_data
Loading

0 comments on commit 93a4bea

Please sign in to comment.