diff --git a/photonai/base/PhotonBase.py b/photonai/base/PhotonBase.py index 0b770360..dafcd70f 100644 --- a/photonai/base/PhotonBase.py +++ b/photonai/base/PhotonBase.py @@ -1635,7 +1635,7 @@ def transform(self, X, y=None, **kwargs): elif hasattr(self.base_element, 'predict', **kwargs): # Logger().warn("used prediction instead of transform " + self.name) # raise Warning() - return self.base_element.predict(X) + return self.base_element.predict(X), y, kwargs else: Logger().error('BaseException: transform-predict-mess') raise BaseException('transform-predict-mess') @@ -1956,7 +1956,7 @@ def predict(self, data, targets=None, **kwargs): # todo: parallelize prediction predicted_data = np.array([]) for name, element in self.pipe_elements.items(): - element_transform, _, _ = element.predict(data, **kwargs) + element_transform = element.predict(data, **kwargs) predicted_data = PipelineStacking.stack_data(predicted_data, element_transform) if self.voting: if hasattr(predicted_data, 'shape'): @@ -2217,6 +2217,14 @@ def set_params(self, **kwargs): self.base_element.set_params(**unnamed_config) return self + def copy_me(self): + + ps = PipelineSwitch(self.name) + for element in self.pipeline_element_list: + new_element = element.copy_me() + ps += new_element + return ps + def prettify_config_output(self, config_name, config_value, return_dict=False): """ diff --git a/photonai/base/PhotonBatchElement.py b/photonai/base/PhotonBatchElement.py index fb417bd4..27bc186a 100644 --- a/photonai/base/PhotonBatchElement.py +++ b/photonai/base/PhotonBatchElement.py @@ -1,5 +1,5 @@ from .PhotonBase import PipelineElement -from ..photonlogger import Logger +from ..photonlogger.Logger import Logger import numpy as np diff --git a/photonai/configuration/PhotonCore.json b/photonai/configuration/PhotonCore.json index bde629f1..b906e043 100644 --- a/photonai/configuration/PhotonCore.json +++ b/photonai/configuration/PhotonCore.json @@ -419,58 +419,14 @@ "photonai.modelwrapper.KerasDNNRegressor.KerasDNNRegressor", "Estimator" ], - "SiameseDNNClassifier":[ - "photonai.modelwrapper.SiameseDNNClassifier.SiameseDNNClassifier", - "Estimator" - ], - "PretrainedCNNClassifier":[ - "photonai.modelwrapper.PretrainedCNN.PretrainedCNNClassifier", - "Estimator" - ], - "CNN1dClassifier":[ - "photonai.modelwrapper.CNN1d.CNN1d", - "Estimator" - ], - "TensorFlowDNNClassifier":[ - "photonai.modelwrapper.TFDNNClassifier.TFDNNClassifier", - "Estimator" - ], "KerasDNNMultiOutput":[ "photonai.modelwrapper.KerasDNNMultiOutput.KerasDNNMultiOutput", "Estimator" ], - "SimpleAutoencoder":[ - "photonai.modelwrapper.KerasAutoencoder.SimpleAutoencoder", - "Estimator" - ], - "RLCNN":[ - "photonai.modelwrapper.RLCNN.RLCNN", - "Estimator" - ], - "WrapperModel":[ - "photonai.modelwrapper.WrapperModel.WrapperModel", - "Estimator" - ], - "LogisticGWASFeatureSelection":[ - "photonai.modelwrapper.FeatureSelection.LogisticGWASFeatureSelection", - "Transformer" - ], "SourceSplitter":[ "photonai.modelwrapper.SourceSplitter.SourceSplitter", "Transformer" ], - "PyESNClassifier":[ - "photonai.modelwrapper.PyESNWrapper.PyESNClassifier", - "Estimator" - ], - "PyESNRegressor":[ - "photonai.modelwrapper.PyESNWrapper.PyESNRegressor", - "Estimator" - ], - "TestWrapper":[ - "photonai.modelwrapper.TestWrapper.WrapperTestElement", - "Estimator" - ], "CategorialANOVASelectPercentile":[ "photonai.modelwrapper.FeatureSelection.AnovaSelectPercentile", "Transformer" @@ -499,18 +455,10 @@ "photonai.modelwrapper.PhotonOneClassSVM.PhotonOneClassSVM", "Estimator" ], - "Selectphotonai.modelwrapper":[ - "photonai.modelwrapper.FeatureSelection.ModelSelector", - "Transformer" - ], "ImbalancedDataTransform":[ "photonai.modelwrapper.ImbalancedWrapper.ImbalancedDataTransform", "Transformer" ], - "GPR_AM":[ - "photonai.modelwrapper.GPR_AM.GPR_AM", - "Transformer" - ], "AnomalyDetectorWrapper":[ "photonai.modelwrapper.AnomalyDetectorWrapper.AnomalyDetectorWrapper", "Estimator" @@ -526,9 +474,6 @@ "SamplePairingClassification": [ "photonai.modelwrapper.SamplePairing.SamplePairingClassification", "Transformer" - ], - "PretrainedCNNRegressor": [ - "photonai.modelwrapper.PretrainedCNN.PretrainedCNNRegressor", - "Estimator" ] + } diff --git a/photonai/documentation/__init__.py b/photonai/documentation/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/photonai/documentation/photonai/base/index.html b/photonai/documentation/photonai/base/index.html deleted file mode 100644 index c6e8461d..00000000 --- a/photonai/documentation/photonai/base/index.html +++ /dev/null @@ -1,7343 +0,0 @@ - - - - - - photonai.base API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai.base module

-

PHOTON Base Classes enabling the nested-cross-validated hyperparameter search.

- - -
-
""" PHOTON Base Classes enabling the nested-cross-validated hyperparameter search."""
-
-from .PhotonBase import Hyperpipe, PipelineElement, PipelineSwitch, PipelineStacking, PipelineBranch, OutputSettings
-from .ImbalancedWrapper import ImbalancedDataTransform
-
-__all__ = ("Hyperpipe",
-           "PipelineElement",
-           "PipelineSwitch",
-           "PipelineStacking",
-           "PipelineBranch",
-           "OutputSettings",
-           "ImbalancedDataTransform")
-
- -
- -
- -
- - -

Classes

- -
-

class Hyperpipe

- - -

Wrapper class for machine learning pipeline, holding all pipeline elements -and managing the optimization of the hyperparameters

-

Parameters

-
    -
  • -

    'name' [str]: - Name of hyperpipe instance

    -
  • -
  • -

    'inner_cv' [BaseCrossValidator]: - Cross validation strategy to test hyperparameter configurations, generates the validation set

    -
  • -
  • -

    'outer_cv' [BaseCrossValidator]: - Cross validation strategy to use for the hyperparameter search itself, generates the test set

    -
  • -
  • -

    'optimizer' [str or object, default="grid_search"]: - Hyperparameter optimization algorithm

    -
      -
    • -

      In case a string literal is given:

      -
        -
      • "grid_search": optimizer that iteratively tests all possible hyperparameter combinations
      • -
      • "random_grid_search": a variation of the grid search optimization that randomly picks hyperparameter - combinations from all possible hyperparameter combinations
      • -
      • "timeboxed_random_grid_search": randomly chooses hyperparameter combinations from the set of all - possible hyperparameter combinations and tests until the given time limit is reached
      • -
      • 'limit_in_minutes': int
      • -
      -
    • -
    • -

      In case an object is given: - expects the object to have the following methods:

      -
    • -
    • 'next_config_generator': returns a hyperparameter configuration in form of an dictionary containing - key->value pairs in the sklearn parameter encoding 'model_name__parameter_name: parameter_value'
    • -
    • 'prepare': takes a list of pipeline elements and their particular hyperparameters to test
    • -
    • 'evaluate_recent_performance': gets a tested config and the respective performance in order to - calculate a smart next configuration to process
    • -
    -
  • -
  • -

    'metrics' [list of metric names as str]: - Metrics that should be calculated for both training, validation and test set - Use the preimported metrics from sklearn and photonai, or register your own

    -
      -
    • Metrics for 'classification':
        -
      • 'accuracy': sklearn.metrics.accuracy_score
      • -
      • 'matthews_corrcoef': sklearn.metrics.matthews_corrcoef
      • -
      • 'confusion_matrix': sklearn.metrics.confusion_matrix,
      • -
      • 'f1_score': sklearn.metrics.f1_score
      • -
      • 'hamming_loss': sklearn.metrics.hamming_loss
      • -
      • 'log_loss': sklearn.metrics.log_loss
      • -
      • 'precision': sklearn.metrics.precision_score
      • -
      • 'recall': sklearn.metrics.recall_score
      • -
      -
    • -
    • Metrics for 'regression':
        -
      • 'mean_squared_error': sklearn.metrics.mean_squared_error
      • -
      • 'mean_absolute_error': sklearn.metrics.mean_absolute_error
      • -
      • 'explained_variance': sklearn.metrics.explained_variance_score
      • -
      • 'r2': sklearn.metrics.r2_score
      • -
      -
    • -
    • Other metrics
        -
      • 'pearson_correlation': photon_core.framework.Metrics.pearson_correlation
      • -
      • 'variance_explained': photon_core.framework.Metrics.variance_explained_score
      • -
      • 'categorical_accuracy': photon_core.framework.Metrics.categorical_accuracy_score
      • -
      -
    • -
    -
  • -
  • -

    'best_config_metric' [str]: - The metric that should be maximized or minimized in order to choose the best hyperparameter configuration

    -
  • -
  • -

    'eval_final_performance' [bool, default=True]: - If the metrics should be calculated for the test set, otherwise the test set is seperated but not used

    -
  • -
  • -

    'test_size' [float, default=0.2]: - the amount of the data that should be left out if no outer_cv is given and - eval_final_perfomance is set to True

    -
  • -
  • -

    'set_random_seed' [bool, default=False]: - If True sets the random seed to 42

    -
  • -
  • -

    'verbosity' [int, default=0]: - The level of verbosity, 0 is least talkative and gives only warn and error, 1 gives adds info and 2 adds debug

    -
  • -
  • -

    'groups' [array-like, default=None]: - Info for advanced cross validation strategies, such as LeaveOneSiteOut-CV about the affiliation - of the rows in the data

    -
  • -
  • -

    'filter_element' [SourceFilter, default=None]: - Instance of SourceFilter Class that transforms the input data, e.g. extracts certain columns

    -
  • -
  • -

    'imbalanced_data_strategy_filter' [str, default=None]: - Uses the imblearn package to handle imbalanced class distributions in the data - A strategy is used to transform the data into more balanced distributions before the hyperparameter search - is started. - Strategies to choose from are:

    -
      -
    • imbalance_type = OVERSAMPLING:
        -
      • RandomOverSampler
      • -
      • SMOTE
      • -
      • ADASYN
      • -
      -
    • -
    -

    -imbalance_type = UNDERSAMPLING: - - ClusterCentroids, - - RandomUnderSampler, - - NearMiss, - - InstanceHardnessThreshold, - - CondensedNearestNeighbour, - - EditedNearestNeighbours, - - RepeatedEditedNearestNeighbours, - - AllKNN, - - NeighbourhoodCleaningRule, - - OneSidedSelection

    -
      -
    • imbalance_type = COMBINE:
        -
      • SMOTEENN,
      • -
      • SMOTETomek
      • -
      -
    • -
    -
  • -
-

Attributes

-
    -
  • -

    'optimum_pipe' [Pipeline]: - An sklearn pipeline object that is fitted to the training data according to the best hyperparameter - configuration found. Currently, we don't create an ensemble of all best hyperparameter configs over all folds. - We find the best config by comparing the test error across outer folds. The hyperparameter config of the best - fold is used as the optimal model and is then trained on the complete set.

    -
  • -
  • -

    'best_config' [dict]: - Dictionary containing the hyperparameters of the best configuration. - Contains the parameters in the sklearn interface of model_name__parameter_name: parameter value

    -
  • -
  • -

    'result_tree' [MDBHyperpipe]: - Object containing all information about the for the performed hyperparameter search. - Holds the training and test metrics for all outer folds, inner folds and configurations, as well as - additional information.

    -
  • -
  • -

    'pipeline_elements' [list]: - Contains all PipelineElement or Hyperpipe objects that are added to the pipeline.

    -
  • -
-

Example

-
manager = Hyperpipe('test_manager',
-                    optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1},
-                    outer_cv=ShuffleSplit(test_size=0.2, n_splits=1),
-                    inner_cv=KFold(n_splits=10, shuffle=True),
-                    metrics=['accuracy', 'precision', 'recall', "f1_score"],
-                    best_config_metric='accuracy', eval_final_performance=True,
-                    verbose=2)
-
-
- -
-
class Hyperpipe(BaseEstimator):
-    """
-    Wrapper class for machine learning pipeline, holding all pipeline elements
-    and managing the optimization of the hyperparameters
-
-    Parameters
-    ----------
-    * 'name' [str]:
-        Name of hyperpipe instance
-
-    * 'inner_cv' [BaseCrossValidator]:
-        Cross validation strategy to test hyperparameter configurations, generates the validation set
-
-    * 'outer_cv' [BaseCrossValidator]:
-        Cross validation strategy to use for the hyperparameter search itself, generates the test set
-
-    * 'optimizer' [str or object, default="grid_search"]:
-        Hyperparameter optimization algorithm
-
-        - In case a string literal is given:
-            - "grid_search": optimizer that iteratively tests all possible hyperparameter combinations
-            - "random_grid_search": a variation of the grid search optimization that randomly picks hyperparameter
-               combinations from all possible hyperparameter combinations
-            - "timeboxed_random_grid_search": randomly chooses hyperparameter combinations from the set of all
-               possible hyperparameter combinations and tests until the given time limit is reached
-               - 'limit_in_minutes': int
-
-        - In case an object is given:
-          expects the object to have the following methods:
-           - 'next_config_generator': returns a hyperparameter configuration in form of an dictionary containing
-              key->value pairs in the sklearn parameter encoding 'model_name__parameter_name: parameter_value'
-           - 'prepare': takes a list of pipeline elements and their particular hyperparameters to test
-           - 'evaluate_recent_performance': gets a tested config and the respective performance in order to
-              calculate a smart next configuration to process
-
-    * 'metrics' [list of metric names as str]:
-        Metrics that should be calculated for both training, validation and test set
-        Use the preimported metrics from sklearn and photonai, or register your own
-
-        - Metrics for 'classification':
-            - 'accuracy': sklearn.metrics.accuracy_score
-            - 'matthews_corrcoef': sklearn.metrics.matthews_corrcoef
-            - 'confusion_matrix': sklearn.metrics.confusion_matrix,
-            - 'f1_score': sklearn.metrics.f1_score
-            - 'hamming_loss': sklearn.metrics.hamming_loss
-            - 'log_loss': sklearn.metrics.log_loss
-            - 'precision': sklearn.metrics.precision_score
-            - 'recall': sklearn.metrics.recall_score
-        - Metrics for 'regression':
-            - 'mean_squared_error': sklearn.metrics.mean_squared_error
-            - 'mean_absolute_error': sklearn.metrics.mean_absolute_error
-            - 'explained_variance': sklearn.metrics.explained_variance_score
-            - 'r2': sklearn.metrics.r2_score
-        - Other metrics
-            - 'pearson_correlation': photon_core.framework.Metrics.pearson_correlation
-            - 'variance_explained':  photon_core.framework.Metrics.variance_explained_score
-            - 'categorical_accuracy': photon_core.framework.Metrics.categorical_accuracy_score
-
-    * 'best_config_metric' [str]:
-        The metric that should be maximized or minimized in order to choose the best hyperparameter configuration
-
-    * 'eval_final_performance' [bool, default=True]:
-        If the metrics should be calculated for the test set, otherwise the test set is seperated but not used
-
-    * 'test_size' [float, default=0.2]:
-        the amount of the data that should be left out if no outer_cv is given and
-        eval_final_perfomance is set to True
-
-    * 'set_random_seed' [bool, default=False]:
-        If True sets the random seed to 42
-
-    * 'verbosity' [int, default=0]:
-        The level of verbosity, 0 is least talkative and gives only warn and error, 1 gives adds info and 2 adds debug
-
-    * 'groups' [array-like, default=None]:
-        Info for advanced cross validation strategies, such as LeaveOneSiteOut-CV about the affiliation
-        of the rows in the data
-
-    * 'filter_element' [SourceFilter, default=None]:
-        Instance of SourceFilter Class that transforms the input data, e.g. extracts certain columns
-
-    * 'imbalanced_data_strategy_filter' [str, default=None]:
-        Uses the imblearn package to handle imbalanced class distributions in the data
-        A strategy is used to transform the data into more balanced distributions before the hyperparameter search
-        is started.
-        Strategies to choose from are:
-        - imbalance_type = OVERSAMPLING:
-            - RandomOverSampler
-            - SMOTE
-            - ADASYN
-
-        -imbalance_type = UNDERSAMPLING:
-            - ClusterCentroids,
-            - RandomUnderSampler,
-            - NearMiss,
-            - InstanceHardnessThreshold,
-            - CondensedNearestNeighbour,
-            - EditedNearestNeighbours,
-            - RepeatedEditedNearestNeighbours,
-            - AllKNN,
-            - NeighbourhoodCleaningRule,
-            - OneSidedSelection
-
-        - imbalance_type = COMBINE:
-            - SMOTEENN,
-            - SMOTETomek
-
-    Attributes
-    ----------
-    * 'optimum_pipe' [Pipeline]:
-        An sklearn pipeline object that is fitted to the training data according to the best hyperparameter
-        configuration found. Currently, we don't create an ensemble of all best hyperparameter configs over all folds.
-        We find the best config by comparing the test error across outer folds. The hyperparameter config of the best
-        fold is used as the optimal model and is then trained on the complete set.
-
-    * 'best_config' [dict]:
-        Dictionary containing the hyperparameters of the best configuration.
-        Contains the parameters in the sklearn interface of model_name__parameter_name: parameter value
-
-    * 'result_tree' [MDBHyperpipe]:
-        Object containing all information about the for the performed hyperparameter search.
-        Holds the training and test metrics for all outer folds, inner folds and configurations, as well as
-        additional information.
-
-    * 'pipeline_elements' [list]:
-        Contains all PipelineElement or Hyperpipe objects that are added to the pipeline.
-
-    Example
-    -------
-        manager = Hyperpipe('test_manager',
-                            optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1},
-                            outer_cv=ShuffleSplit(test_size=0.2, n_splits=1),
-                            inner_cv=KFold(n_splits=10, shuffle=True),
-                            metrics=['accuracy', 'precision', 'recall', "f1_score"],
-                            best_config_metric='accuracy', eval_final_performance=True,
-                            verbose=2)
-
-   """
-
-    OPTIMIZER_DICTIONARY = {'grid_search': GridSearchOptimizer,
-                            'random_grid_search': RandomGridSearchOptimizer,
-                            'timeboxed_random_grid_search': TimeBoxedRandomGridSearchOptimizer}
-
-    def __init__(self, name, inner_cv: BaseCrossValidator, outer_cv=None,
-                 optimizer='grid_search', optimizer_params: dict = {}, metrics=None,
-                 best_config_metric=None, eval_final_performance=True, test_size: float = 0.2,
-                 calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False,
-                 groups=None, set_random_seed: bool=False,
-                 filter_element=None, imbalanced_data_strategy_filter: str = '',
-                 verbosity=0,
-                 persist_options=None,
-                 performance_constraints=None):
-
-        # Re eval_final_performance:
-        # set eval_final_performance to False because
-        # 1. if no cv-object is given, no split is performed --> seems more logical
-        #    than passing nothing, passing no cv-object but getting
-        #    an 80/20 split by default
-        # 2. if cv-object is given, split is performed but we don't peek
-        #    into the test set --> thus we can evaluate more hp configs
-        #    later without double dipping
-
-        self.name = name
-        self.inner_cv = inner_cv
-        self.outer_cv = outer_cv
-        self.eval_final_performance = eval_final_performance
-        self.test_size = test_size
-        self.cv_iter = None
-        self.data_test_cases = None
-
-        self.calculate_metrics_per_fold = calculate_metrics_per_fold
-        self.calculate_metrics_across_folds = calculate_metrics_across_folds
-
-        # Todo: if self.outer_cv is LeaveOneOut: Set calculate metrics across folds to True -> Print
-
-        self.X = None
-        self.y = None
-
-        self.groups = groups
-        self.filter_element = filter_element
-        if imbalanced_data_strategy_filter:
-            self.imbalanced_data_strategy_filter = ImbalancedDataTransform(imbalanced_data_strategy_filter)
-        else:
-            self.imbalanced_data_strategy_filter = None
-
-        self.fit_duration = 0
-
-        if set_random_seed:
-            import random
-            random.seed(42)
-            print('set random seed to 42')
-
-        # set verbosity level
-        Logger().set_verbosity(verbosity)
-
-        # MongoDBWriter setup
-        if persist_options:
-            self.persist_options = persist_options
-            if self.persist_options.log_file:
-                Logger().set_custom_log_file(self.persist_options.log_file)
-        else:
-            self.persist_options = OutputSettings()
-        self.mongodb_writer = MongoDBWriter(self.persist_options)
-
-        self.pipeline_elements = []
-        self._pipe = None
-        self.optimum_pipe = None
-
-        self.metrics = metrics
-        #  Todo: raise error or warning if metrics and best config_metric is None
-        self.best_config_metric = best_config_metric
-        self.config_optimizer = None
-
-        self.result_tree = None
-        self.best_config = None
-        self.best_children_config = None
-        self.best_performance = None
-        self.is_final_fit = False
-
-        self.__mother_outer_fold_counter = 0
-        self.__mother_inner_fold_counter = 0
-        self.__mother_config_counter = 0
-
-        # containers for optimization history and logging
-        self._performance_history_list = []
-
-        if isinstance(optimizer, str):
-            # instantiate optimizer from string
-            #  Todo: check if optimizer strategy is already implemented
-            optimizer_class = self.OPTIMIZER_DICTIONARY[optimizer]
-            optimizer_instance = optimizer_class(**optimizer_params)
-            self.optimizer = optimizer_instance
-        else:
-            # Todo: check if correct object
-            self.optimizer = optimizer
-
-        self._validation_X = None
-        self._validation_y = None
-        self._test_X = None
-        self._test_y = None
-        self._last_fit_data_hash = None
-        self._current_fold = -1
-        self._num_of_folds = 0
-        self._is_mother_pipe = True
-        self._fold_data_hashes = []
-
-        self.inner_cv_callback_function = performance_constraints
-
-    def _set_verbosity(self, verbosity):
-        """
-        Set verbosity level manually
-        Returns None
-
-        Parameters
-        ----------
-        * 'verbosity' [Integer]:
-            Verbosity level can be 0, 1, or 2.
-
-        """
-        Logger().set_verbosity(verbosity)
-
-    def _set_persist_options(self, persist_options):
-        """
-        Set persist options manually
-        Returns None
-
-        Parameters
-        ----------
-        * persist_options' [OutputSettings]:
-
-        """
-        self.persist_options = persist_options
-        if self.persist_options.log_file:
-            Logger().set_custom_log_file(self.persist_options.log_file)
-        self.mongodb_writer = MongoDBWriter(self.persist_options)
-
-    def __iadd__(self, pipe_element):
-        """
-        Add an element to the machine learning pipeline
-        Returns self
-
-        Parameters
-        ----------
-        * 'pipe_element' [PipelineElement or Hyperpipe]:
-            The object to add to the machine learning pipeline, being either a transformer or an estimator.
-
-        """
-        # if isinstance(pipe_element, PipelineElement):
-        self.pipeline_elements.append(pipe_element)
-        # Todo: is repeated each time element is added....
-        self._prepare_pipeline()
-        return self
-        # else:
-        #     Todo: raise error
-        # raise TypeError("Element must be of type Pipeline Element")
-
-    def add(self, pipe_element):
-        self.__iadd__(pipe_element)
-
-    def __yield_all_data(self):
-        """
-        Helper function that iteratively returns the data stored in self.X
-        Returns an iterable version of self.X
-        """
-        if hasattr(self.X, 'shape'):
-            yield list(range(self.X.shape[0])), []
-        else:
-            yield list(range(len(self.X))), []
-
-    def _generate_outer_cv_indices(self):
-        """
-        Generates the training and  test set indices for the hyperparameter search
-        Returns a tuple of training and test indices
-
-        - If there is a strategy given for the outer cross validation the strategy is called to split the data
-        - If no strategy is given and eval_final_performance is True, all data is used for training
-        - If no strategy is given and eval_final_performance is False: a test set is seperated from the
-          training and validation set by the parameter test_size with ShuffleSplit
-        """
-        # if there is a CV Object for cross validating the hyperparameter search
-        if self.outer_cv:
-            self.data_test_cases = self.outer_cv.split(self.X, self.y)
-        # in case we do not want to divide between validation and test set
-        elif not self.eval_final_performance:
-            self.data_test_cases = self.__yield_all_data()
-        # the default is dividing one time into a validation and test set
-        else:
-            train_test_cv_object = ShuffleSplit(n_splits=1, test_size=self.test_size)
-            self.data_test_cases = train_test_cv_object.split(self.X, self.y)
-
-    def _distribute_cv_info_to_hyperpipe_children(self, reset: bool =False, reset_final_fit: bool=False,
-                                                  outer_fold_counter: int=None, inner_fold_counter: int =None,
-                                                  num_of_folds: int = None, config_counter: int =None):
-        """
-        Informs all elements of the pipeline that are of type hyperpipe (hyperpipe children)
-        about the mother's configuration or current state
-
-        Parameters
-        ----------
-        * 'num_of_folds' [int]:
-            how many inner folds the mother hyperpipe has
-
-        * 'outer_fold_counter' [int]:
-            in which outer fold the mother hyerpipe currently is
-
-        * 'inner_fold_counter' [int]:
-            in which inner fold the mother hyperpipe currently is
-
-        * 'config_counter' [int]:
-            in which config_nr the mother hyperpipe actually is
-
-        * 'reset' [bool, default = False]:
-            if the hyperparameter search starts anew
-
-        * 'reset_final_fit' [bool, default = False]:
-            reset the is_final_fit parameter so that children hyperpipe train anew for outer fold of mother pipe
-
-        """
-
-        def _distribute_info_to_object(pipe_object, number_of_folds, reset_folds, reset_final_fit,
-                                      outer_fold_counter, inner_fold_counter, config_counter):
-            if pipe_object.local_search:
-                if number_of_folds is not None:
-                    pipe_object.num_of_folds = number_of_folds
-                    pipe_object.is_mother_pipe = False
-                if reset_folds:
-                    pipe_object.current_fold = -1
-                if outer_fold_counter is not None:
-                    pipe_object.mother_outer_fold_counter = outer_fold_counter
-                if inner_fold_counter is not None:
-                    pipe_object.mother_inner_fold_counter = inner_fold_counter
-                if config_counter:
-                    pipe_object.mother_config_counter = config_counter
-                if reset_final_fit:
-                    pipe_object.is_final_fit = False
-
-        # walk through all children of pipeline, if its a hyperpipe distribute the information
-        for element_tuple in self._pipe.steps:
-            element_object = element_tuple[1]
-            if isinstance(element_object, Hyperpipe):
-                _distribute_info_to_object(element_object, num_of_folds, reset, reset_final_fit,
-                                          outer_fold_counter, inner_fold_counter, config_counter)
-            elif isinstance(element_object, PipelineStacking):
-                for child_pipe_name, child_pipe_object in element_object.pipe_elements.items():
-                    if isinstance(child_pipe_object, Hyperpipe):
-                        _distribute_info_to_object(child_pipe_object, num_of_folds, reset, reset_final_fit,
-                                                  outer_fold_counter, inner_fold_counter, config_counter)
-
-    def update_mother_inner_fold_nr(self, new_inner_fold_nr: int):
-        """
-        Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children
-
-        Parameters
-        ----------
-        * 'new_inner_fold_nr' [int]:
-            in which inner_fold the mother hyperpipe currently is
-        """
-        self._distribute_cv_info_to_hyperpipe_children(inner_fold_counter=new_inner_fold_nr)
-
-    def fit(self, data, targets, **fit_params):
-        """
-        Starts the hyperparameter search and/or fits the pipeline to the data and targets
-
-        Manages the nested cross validated hyperparameter search:
-
-        1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
-        2. requests new configurations from the hyperparameter search strategy, the optimizer,
-        3. initializes the testing of a specific configuration,
-        4. communicates the result to the optimizer,
-        5. repeats 2-4 until optimizer delivers no more configurations to test
-        6. finally searches for the best config in all tested configs,
-        7. trains the pipeline with the best config and evaluates the performance on the test set
-
-        Parameters
-        ----------
-         * `data` [array-like, shape=[N, D]]:
-            the training and test data, where N is the number of samples and D is the number of features.
-
-         * `targets` [array-like, shape=[N]]:
-            the truth values, where N is the number of samples.
-
-
-        Returns
-        -------
-         * 'self'
-            Returns self
-
-        """
-
-        # in case we want to inject some data from outside the pipeline
-
-        self.X = data
-        self.y = targets
-
-
-        # !!!!!!!!!!!!!!!! FIT ONLY IF DATA CHANGED !!!!!!!!!!!!!!!!!!!
-        # -------------------------------------------------------------
-
-        # in case we need to reduce the dimension of the data due to parallelity of the outer pipe, lets do it.
-        if self.filter_element:
-            self.X = self.filter_element.transform(self.X)
-
-        # if the groups are imbalanced, and a strategy is chosen, apply it here
-        if self.imbalanced_data_strategy_filter:
-            self.imbalanced_data_strategy_filter.fit(self.X, self.y)
-            self.X, self.y = self.imbalanced_data_strategy_filter.transform()
-
-        self._current_fold += 1
-
-        # be compatible to list of (image-) files
-        if isinstance(self.X, list):
-            self.X = np.asarray(self.X)
-        if isinstance(self.y, list):
-            self.y = np.asarray(self.y)
-
-        # handle neuro Imge paths as data
-        # ToDo: Need to check the DATA, not the img paths for neuro
-        new_data_hash = sha1(np.asarray(self.X, order='C')).hexdigest()
-
-        # fit
-        # 1. if it is first time ever or
-        # 2. the data did change for that fold or
-        # 3. if it is the mother pipe (then number_of_folds = 0)
-        if (len(self._fold_data_hashes) < self._num_of_folds) \
-                or (self._num_of_folds > 0 and self._fold_data_hashes[self._current_fold] != new_data_hash) \
-                or self._num_of_folds == 0:
-
-            # save data hash for that fold
-            if self._num_of_folds > 0:
-                if len(self._fold_data_hashes) < self._num_of_folds:
-                    self._fold_data_hashes.append(new_data_hash)
-                else:
-                    self._fold_data_hashes[self._current_fold] = new_data_hash
-
-            # optimize: iterate through configs and save results
-            if not self.is_final_fit:
-
-                # first check if correct optimizer metric has been chosen
-                # pass pipeline_elements so that OptimizerMetric can look for last
-                # element and use the corresponding score method
-                self.config_optimizer = OptimizerMetric(self.best_config_metric, self.pipeline_elements, self.metrics)
-                self.metrics = self.config_optimizer.check_metrics()
-
-                if 'score' in self.metrics:
-                    Logger().warn('Attention: Scoring with default score function of estimator can slow down calculations!')
-
-                # generate OUTER ! cross validation splits to iterate over
-                self._generate_outer_cv_indices()
-
-                outer_fold_counter = 0
-
-                if not self._is_mother_pipe:
-                    self.result_tree_name = self.name + '_outer_fold_' + str(self.__mother_outer_fold_counter)  \
-                                            + '_inner_fold_' + str(self.__mother_inner_fold_counter)
-                else:
-                    self.result_tree_name = self.name
-
-                # initialize result logging with hyperpipe class
-                self.result_tree = MDBHyperpipe(name=self.result_tree_name)
-                self.result_tree.outer_folds = []
-                self.result_tree.eval_final_performance = self.eval_final_performance
-                self.result_tree.best_config_metric = self.best_config_metric
-
-                # loop over outer cross validation
-                for train_indices, test_indices in self.data_test_cases:
-
-                    # give the optimizer the chance to inform about elements
-                    self.optimizer.prepare(self.pipeline_elements)
-
-                    outer_fold_counter += 1
-                    outer_fold_fit_start_time = time.time()
-
-                    Logger().info('HYPERPARAMETER SEARCH OF {0}, Outer Cross validation Fold {1}'
-                                  .format(self.name, outer_fold_counter))
-
-                    t1 = time.time()
-
-                    # Prepare Train and validation set data
-                    self._validation_X = self.X[train_indices]
-                    self._validation_y = self.y[train_indices]
-                    self._test_X = self.X[test_indices]
-                    self._test_y = self.y[test_indices]
-
-                    # Prepare inner cross validation
-                    cv_iter = list(self.inner_cv.split(self._validation_X, self._validation_y))
-                    num_folds = len(cv_iter)
-                    num_samples_train = len(self._validation_y)
-                    num_samples_test = len(self._test_y)
-
-                    # distribute number of folds to encapsulated child hyperpipes
-                    self._distribute_cv_info_to_hyperpipe_children(num_of_folds=num_folds,
-                                                                   outer_fold_counter=outer_fold_counter)
-
-                    tested_config_counter = 0
-
-                    # add outer fold info object to result tree
-                    outer_fold = MDBOuterFold(fold_nr=outer_fold_counter)
-                    outer_fold.tested_config_list = []
-                    self.result_tree.outer_folds.append(outer_fold)
-
-                    # do the optimizing
-                    for current_config in self.optimizer.next_config:
-                        self._distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)
-                        hp = TestPipeline(self._pipe, current_config, self.metrics, self.update_mother_inner_fold_nr,
-                                          mongo_db_settings=self.persist_options,
-                                          callback_function=self.inner_cv_callback_function)
-                        Logger().debug('optimizing of:' + self.name)
-                        Logger().debug(self._optimize_printing(current_config))
-                        Logger().debug('calculating...')
-
-                        # Test the configuration cross validated by inner_cv object
-                        current_config_mdb = hp.calculate_cv_score(self._validation_X, self._validation_y, cv_iter,
-                                                            calculate_metrics_per_fold=self.calculate_metrics_per_fold,
-                                                            calculate_metrics_across_folds=self.calculate_metrics_across_folds)
-
-                        current_config_mdb.config_nr = tested_config_counter
-                        current_config_mdb.config_dict = current_config
-                        current_config_mdb.pipe_name = self.name
-                        tested_config_counter += 1
-                        current_config_mdb.human_readable_config = self.config_to_dict(current_config)
-
-                        # save the configuration of all children pipelines
-                        children_config = {}
-                        children_config_ref_list = []
-                        for pipe_step in self._pipe.steps:
-                            item = pipe_step[1]
-                            if isinstance(item, Hyperpipe):
-                                if item.local_search and item.best_config is not None:
-                                    children_config[item.name] = item.best_config
-                            elif isinstance(item, PipelineStacking):
-                                for subhyperpipe_name, hyperpipe in item.pipe_elements.items():
-                                    if isinstance(hyperpipe, Hyperpipe):
-                                        if hyperpipe.local_search and hyperpipe.best_config is not None:
-                                            # special case: we need to access pipe over pipeline_stacking element
-                                            children_config[item.name + '__' + subhyperpipe_name] = hyperpipe.best_config.config_dict
-                                        # children_config_ref_list.append(hyperpipe.best_config_outer_fold._id)
-                        specific_parameters = self._pipe.get_params()
-                        #current_config_mdb.full_model_spec = specific_parameters
-
-                        current_config_mdb.children_config_dict = children_config
-                        current_config_mdb.children_config_ref = children_config_ref_list
-
-                        Logger().verbose(self._optimize_printing(current_config))
-
-                        if not current_config_mdb.config_failed:
-                            # get optimizer_metric and forward to optimizer
-                            # todo: also pass greater_is_better=True/False to optimizer
-                            metric_train = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric)
-                            metric_test = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric, train=False)
-                            #
-                            # if not metric_train or metric_test:
-                            #     raise Exception("Config did not fail, but did not get any metrics either....!!?")
-                            config_performance = (metric_train, metric_test)
-
-                            # Print Result for config
-                            Logger().debug('...done:')
-                            Logger().verbose(self.config_optimizer.metric + str(config_performance))
-                        else:
-                             config_performance = (-1, -1)
-                             # Print Result for config
-                             Logger().debug('...failed:')
-                             Logger().error(current_config_mdb.config_error)
-
-                        # add config to result tree and do intermediate saving
-                        self.result_tree.outer_folds[-1].tested_config_list.append(current_config_mdb)
-                        # Todo: add try catch in case config cannot be written
-                        self.mongodb_writer.save(self.result_tree)
-
-                        # 3. inform optimizer about performance
-                        self.optimizer.evaluate_recent_performance(current_config, config_performance)
-
-                    if tested_config_counter > 0:
-                        best_config_outer_fold = self.config_optimizer.get_optimum_config(outer_fold.tested_config_list)
-
-                        if not best_config_outer_fold:
-                            raise Exception("No best config was found!")
-                        best_config_outer_fold_mdb = MDBConfig()
-                        best_config_outer_fold_mdb.children_config_dict = best_config_outer_fold.children_config_dict
-                        best_config_outer_fold_mdb.pipe_name = self.name
-                        best_config_outer_fold_mdb.children_config_ref = best_config_outer_fold.children_config_ref
-                        # best_config_outer_fold_mdb.best_config_ref_to_train_item = best_config_outer_fold._id
-                        best_config_outer_fold_mdb.config_dict = best_config_outer_fold.config_dict
-                        best_config_outer_fold_mdb.human_readable_config = best_config_outer_fold.human_readable_config
-
-
-                        # inform user
-                        Logger().info('finished optimization of ' + self.name)
-                        Logger().verbose('Result')
-                        Logger().verbose('Number of tested configurations:' + str(tested_config_counter))
-                        Logger().verbose('Optimizer metric: ' + self.config_optimizer.metric + '\n' +
-                                         '   --> Greater is better: ' + str(self.config_optimizer.greater_is_better))
-                        Logger().info('Best config: ' + self._optimize_printing(best_config_outer_fold_mdb.config_dict) +
-                                      '\n' + '... with children config: '
-                                      + self._optimize_printing(best_config_outer_fold_mdb.children_config_dict))
-
-
-                        # ... and create optimal pipeline
-                        self.optimum_pipe = self._pipe
-                        # set self to best config
-                        self.optimum_pipe.set_params(**best_config_outer_fold_mdb.config_dict)
-
-                        # set all children to best config and inform to NOT optimize again, ONLY fit
-                        for child_name, child_config in best_config_outer_fold_mdb.children_config_dict.items():
-                            if child_config:
-                                # in case we have a pipeline stacking we need to identify the particular subhyperpipe
-                                splitted_name = child_name.split('__')
-                                if len(splitted_name) > 1:
-                                    stacking_element = self.optimum_pipe.named_steps[splitted_name[0]]
-                                    pipe_element = stacking_element.pipe_elements[splitted_name[1]]
-                                else:
-                                    pipe_element = self.optimum_pipe.named_steps[child_name]
-                                pipe_element.set_params(**child_config)
-                                pipe_element.is_final_fit = True
-
-                        self._distribute_cv_info_to_hyperpipe_children(reset=True)
-
-                        Logger().verbose('...now fitting ' + self.name + ' with optimum configuration')
-                        fit_time_start = time.time()
-                        self.optimum_pipe.fit(self._validation_X, self._validation_y)
-                        final_fit_duration = time.time() - fit_time_start
-
-                        #self.best_config_outer_fold.full_model_spec = self.optimum_pipe.get_params()
-                        best_config_outer_fold_mdb.fit_duration_minutes = final_fit_duration
-                        self.result_tree.outer_folds[-1].best_config = best_config_outer_fold_mdb
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = []
-
-                        if self.eval_final_performance:
-                            # Todo: generate mean and std over outer folds as well. move this items to the top
-                            Logger().verbose('...now predicting ' + self.name + ' unseen data')
-
-                            test_score_mdb = TestPipeline.score(self.optimum_pipe, self._test_X, self._test_y,
-                                                                self.metrics,
-                                                                save_predictions=self.persist_options.save_predictions,
-                                                                save_feature_importances=self.persist_options.save_feature_importances)
-
-                            Logger().info('.. calculating metrics for test set (' + self.name + ')')
-                            Logger().verbose('...now predicting ' + self.name + ' final model with training data')
-
-                            train_score_mdb = TestPipeline.score(self.optimum_pipe, self._validation_X, self._validation_y,
-                                                                 self.metrics,
-                                                                 save_predictions=self.persist_options.save_predictions,
-                                                                 save_feature_importances=self.persist_options.save_feature_importances)
-
-                            # save test fold
-                            outer_fold_mdb = MDBInnerFold()
-                            outer_fold_mdb.fold_nr = 1
-                            outer_fold_mdb.number_samples_training = num_samples_train
-                            outer_fold_mdb.number_samples_validation = num_samples_test
-                            outer_fold_mdb.training = train_score_mdb
-                            outer_fold_mdb.validation = test_score_mdb
-                            self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-
-                            Logger().info('PERFORMANCE TRAIN:')
-                            for m_key, m_value in train_score_mdb.metrics.items():
-                                Logger().info(str(m_key) + ": " + str(m_value))
-
-                            Logger().info('PERFORMANCE TEST:')
-                            for m_key, m_value in test_score_mdb.metrics.items():
-                                    Logger().info(str(m_key) + ": " + str(m_value))
-                        else:
-
-                            # save test fold
-                            outer_fold_mdb = MDBInnerFold()
-                            outer_fold_mdb.fold_nr = 1
-                            outer_fold_mdb.number_samples_training = num_samples_train
-                            outer_fold_mdb.number_samples_validation = num_samples_test
-
-                            def _copy_inner_fold_means(metric_dict):
-                                # We copy all mean values from validation to the best config
-                                # training
-                                train_item_metrics = {}
-                                for m in metric_dict:
-                                    if m.operation == str(FoldOperations.MEAN):
-                                        train_item_metrics[m.metric_name] = m.value
-                                train_item = MDBScoreInformation()
-                                train_item.metrics_copied_from_inner = True
-                                train_item.metrics = train_item_metrics
-                                return train_item
-
-                            # training
-                            outer_fold_mdb.training = _copy_inner_fold_means(best_config_outer_fold.metrics_train)
-                            # validation
-                            outer_fold_mdb.validation = _copy_inner_fold_means(best_config_outer_fold.metrics_test)
-
-                            self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-
-                    Logger().info('This took {} minutes.'.format((time.time() - t1) / 60))
-                    self.result_tree.time_of_results = datetime.datetime.now()
-                    self.mongodb_writer.save(self.result_tree)
-                    self._distribute_cv_info_to_hyperpipe_children(reset_final_fit=True, outer_fold_counter=outer_fold_counter)
-
-                # Compute all final metrics
-                self.result_tree.metrics_train, self.result_tree.metrics_test = MDBHelper.aggregate_metrics(self.result_tree.outer_folds,
-                                                                                                            self.metrics)
-                # save result tree to db or file or both
-                self.mongodb_writer.save(self.result_tree)
-                Logger().info("Saved result tree to database")
-
-                # Find best config across outer folds
-                self.best_config = self.config_optimizer.get_optimum_config_outer_folds(self.result_tree.outer_folds)
-                self.result_tree.best_config = self.best_config
-                Logger().info('OVERALL BEST CONFIGURATION')
-                Logger().info('--------------------------')
-                Logger().info(self._optimize_printing(self.best_config.config_dict) +
-                              '\n' + '... with children config: '
-                              + self._optimize_printing(self.best_config.children_config_dict))
-                # set self to best config
-                self.optimum_pipe = self._pipe
-                self.optimum_pipe.set_params(**self.best_config.config_dict)
-                self.optimum_pipe.fit(self._validation_X, self._validation_y)
-
-                # save results again
-                self.mongodb_writer.save(self.result_tree)
-                Logger().info("Saved overall best config to database")
-            ###############################################################################################
-            else:
-                self._pipe.fit(self.X, self.y, **fit_params)
-
-        else:
-            Logger().verbose("Avoided fitting of " + self.name + " on fold "
-                             + str(self._current_fold) + " because data did not change")
-            Logger().verbose('Best config of ' + self.name + ' : ' + str(self.best_config))
-
-        return self
-
-    def predict(self, data):
-        """
-        Use the optimum pipe to predict the data
-
-        Returns
-        -------
-            predicted targets
-
-        """
-        # Todo: if local_search = true then use optimized pipe here?
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.predict(data)
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities
-
-        Returns
-        -------
-        predicted probabilities
-
-        """
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.predict_proba(data)
-
-    def transform(self, data):
-        """
-        Use the optimum pipe to transform the data
-        """
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.transform(data)
-
-    def get_params(self, deep=True):
-        """
-        Retrieve parameters from sklearn pipeline
-        """
-        if self._pipe is not None:
-            return self._pipe.get_params(deep)
-        else:
-            return None
-
-    def set_params(self, **params):
-        """
-        Give parameter values to the pipeline elements
-        """
-        if self._pipe is not None:
-            self._pipe.set_params(**params)
-        return self
-
-    def _prepare_pipeline(self):
-        """
-        build sklearn pipeline from PipelineElements and
-        calculate parameter grid for all combinations of pipeline element hyperparameters
-        """
-        # prepare pipeline
-        pipeline_steps = []
-        for item in self.pipeline_elements:
-            # pipeline_steps.append((item.name, item.base_element))
-            pipeline_steps.append((item.name, item))
-
-        # build pipeline...
-        self._pipe = Pipeline(pipeline_steps)
-
-    def copy_me(self):
-        """
-        Helper function to copy all pipeline elements
-        """
-        item_list =[]
-        for item in self.pipeline_elements:
-            item_list.append(item.copy_me())
-        return item_list
-
-    def _copy_pipeline(self):
-        """
-        Copy Pipeline by building a new sklearn Pipeline with Pipeline Elements
-
-        Returns
-        -------
-        new sklearn Pipeline object
-        """
-        pipeline_steps = []
-        for item in self.pipeline_elements:
-            cpy = item.copy_me()
-            if isinstance(cpy, list):
-                for new_step in cpy:
-                    pipeline_steps.append((new_step.name, new_step))
-            else:
-                pipeline_steps.append((cpy.name, cpy))
-        return Pipeline(pipeline_steps)
-
-    def save_optimum_pipe(self, file):
-        """
-        Save optimal pipeline only. Complete hyperpipe will no not be saved.
-
-        Parameters
-        ----------
-        * 'file' [str]:
-            File path as string specifying file to save pipeline to
-        """
-        element_number = 0
-        element_identifier = list()
-        folder = os.path.splitext(file)[0]
-        file = os.path.splitext(file)[0] + '.photon'
-
-        if os.path.exists(folder):
-            raise FileExistsError('Trying to save optimum pipe: The file you specified already exists as a folder.')
-        else:
-            os.mkdir(folder)
-            folder = folder + '/'
-        wrapper_files = list()
-
-        for element_name, element in self.optimum_pipe.named_steps.items():
-            filename = '_optimum_pipe_' + str(element_number) + '_' + element_name
-            element_identifier.append({'element_name': element_name,
-                                       'filename': filename})
-            if hasattr(element.base_element, 'save'):
-                element.base_element.save(folder + filename)
-                element_identifier[-1]['mode'] = 'custom'
-                element_identifier[-1]['wrapper_script'] = os.path.basename(inspect.getfile(element.base_element.__class__))
-                wrapper_files.append(inspect.getfile(element.base_element.__class__))
-                element_identifier[-1]['test_disabled'] = element.test_disabled
-                element_identifier[-1]['disabled'] = element.disabled
-                element_identifier[-1]['hyperparameters'] = element.hyperparameters
-
-            else:
-                try:
-                    joblib.dump(element, folder + filename + '.pkl', compress=1)
-                    element_identifier[-1]['mode'] = 'pickle'
-                except:
-                    raise NotImplementedError("Custom pipeline element must implement .save() method or "
-                                              "allow pickle.")
-            element_number += 1
-        # save pipeline blueprint to make loading of pipeline easier
-        with open(folder + '_optimum_pipe_blueprint.pkl', 'wb') as f:
-            pickle.dump(element_identifier, f)
-
-        # get all files
-        files = glob.glob(folder + '_optimum_pipe_*')
-        with zipfile.ZipFile(file, 'w') as myzip:
-            for f in files:
-                myzip.write(f, os.path.basename(f))
-                os.remove(f)
-            for f in wrapper_files:
-                myzip.write(f, os.path.splitext(os.path.basename(f))[0] + '.py')
-        os.removedirs(folder)
-
-    @staticmethod
-    def load_optimum_pipe(file):
-        """
-        Load optimal pipeline.
-
-
-        Parameters
-        ----------
-        * 'file' [str]:
-            File path specifying .photon file to load optimal pipeline from
-
-        Returns
-        -------
-        sklearn Pipeline with all trained photon_pipelines
-        """
-        if file.endswith('.photon'):
-            archive_name = os.path.splitext(file)[0]
-            folder = archive_name + '/'
-            zf = zipfile.ZipFile(file)
-            zf.extractall(folder)
-        else:
-            raise FileNotFoundError('Specify .photon file that holds PHOTON optimum pipe.')
-
-        setup_info = pickle.load(open(folder + '_optimum_pipe_blueprint.pkl', 'rb'))
-        element_list = list()
-        for element_info in setup_info:
-            if element_info['mode'] == 'custom':
-                spec = importlib.util.spec_from_file_location(element_info['element_name'],
-                                                              folder + element_info['wrapper_script'])
-                imported_module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(imported_module)
-                base_element = getattr(imported_module, element_info['element_name'])
-                custom_element = PipelineElement(name=element_info['element_name'], base_element=base_element(),
-                                                 hyperparameters=element_info['hyperparameters'],
-                                                 test_disabled=element_info['test_disabled'],
-                                                 disabled=element_info['disabled'])
-                custom_element.base_element.load(folder + element_info['filename'])
-                element_list.append((element_info['element_name'], custom_element))
-            else:
-                element_list.append((element_info['element_name'], joblib.load(folder + element_info['filename'] + '.pkl')))
-
-        return Pipeline(element_list)
-
-
-    def inverse_transform_pipeline(self, hyperparameters: dict, data, targets, data_to_inverse):
-        """
-        Inverse transform data for a pipeline with specific hyperparameter configuration
-
-        1. Copy Sklearn Pipeline,
-        2. Set Parameters
-        3. Fit Pipeline to data and targets
-        4. Inverse transform data with that pipeline
-
-        Parameters
-        ----------
-        * 'hyperparameters' [dict]:
-            The concrete configuration settings for the pipeline elements
-        * 'data' [array-like]:
-            The training data to which the pipeline is fitted
-        * 'targets' [array-like]:
-            The truth values for training
-        * 'data_to_inverse' [array-like]:
-            The data that should be inversed after training
-
-        Returns
-        -------
-        Inversed data as array
-        """
-        copied_pipe = self._copy_pipeline()
-        copied_pipe.set_params(**hyperparameters)
-        copied_pipe.fit(data, targets)
-        return copied_pipe.inverse_transform(data_to_inverse)
-
-    def _optimize_printing(self, config: dict):
-        """
-        make the sklearn config syntax prettily readable for humans
-        """
-        prettified_config = [self.name + '\n']
-        for el_key, el_value in config.items():
-            items = el_key.split('__')
-            name = items[0]
-            rest = '__'.join(items[1::])
-            if name in self._pipe.named_steps:
-                new_pretty_key = '    ' + name + '->'
-                prettified_config.append(new_pretty_key +
-                                         self._pipe.named_steps[name].prettify_config_output(rest, el_value) + '\n')
-            else:
-                Logger().error('ValueError: Item is not contained in pipeline:' + name)
-                raise ValueError('Item is not contained in pipeline:' + name)
-        return ''.join(prettified_config)
-
-    @staticmethod
-    def prettify_config_output(config_name: str, config_value):
-        """
-        Print the disabled = False as Enabled = True for better human reading
-        """
-        if config_name == "disabled" and config_value is False:
-            return "enabled = True"
-        else:
-            return config_name + '=' + str(config_value)
-
-
-    def config_to_dict(self, specific_config):
-        """
-        """
-        config = {}
-        for key, value in specific_config.items():
-            items = key.split('__')
-            name = items[0]
-            rest = '__'.join(items[1::])
-            if name in self._pipe.named_steps:
-                config.update(self._pipe.named_steps[name].prettify_config_output(rest, value, return_dict=True))
-                #config[name] = value
-        return config
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • Hyperpipe
  • -
  • sklearn.base.BaseEstimator
  • -
  • builtins.object
  • -
-

Class variables

-
-

var OPTIMIZER_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, inner_cv, outer_cv=None, optimizer='grid_search', optimizer_params={}, metrics=None, best_config_metric=None, eval_final_performance=True, test_size=0.2, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False, groups=None, set_random_seed=False, filter_element=None, imbalanced_data_strategy_filter='', verbosity=0, persist_options=None, performance_constraints=None)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, name, inner_cv: BaseCrossValidator, outer_cv=None,
-             optimizer='grid_search', optimizer_params: dict = {}, metrics=None,
-             best_config_metric=None, eval_final_performance=True, test_size: float = 0.2,
-             calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False,
-             groups=None, set_random_seed: bool=False,
-             filter_element=None, imbalanced_data_strategy_filter: str = '',
-             verbosity=0,
-             persist_options=None,
-             performance_constraints=None):
-    # Re eval_final_performance:
-    # set eval_final_performance to False because
-    # 1. if no cv-object is given, no split is performed --> seems more logical
-    #    than passing nothing, passing no cv-object but getting
-    #    an 80/20 split by default
-    # 2. if cv-object is given, split is performed but we don't peek
-    #    into the test set --> thus we can evaluate more hp configs
-    #    later without double dipping
-    self.name = name
-    self.inner_cv = inner_cv
-    self.outer_cv = outer_cv
-    self.eval_final_performance = eval_final_performance
-    self.test_size = test_size
-    self.cv_iter = None
-    self.data_test_cases = None
-    self.calculate_metrics_per_fold = calculate_metrics_per_fold
-    self.calculate_metrics_across_folds = calculate_metrics_across_folds
-    # Todo: if self.outer_cv is LeaveOneOut: Set calculate metrics across folds to True -> Print
-    self.X = None
-    self.y = None
-    self.groups = groups
-    self.filter_element = filter_element
-    if imbalanced_data_strategy_filter:
-        self.imbalanced_data_strategy_filter = ImbalancedDataTransform(imbalanced_data_strategy_filter)
-    else:
-        self.imbalanced_data_strategy_filter = None
-    self.fit_duration = 0
-    if set_random_seed:
-        import random
-        random.seed(42)
-        print('set random seed to 42')
-    # set verbosity level
-    Logger().set_verbosity(verbosity)
-    # MongoDBWriter setup
-    if persist_options:
-        self.persist_options = persist_options
-        if self.persist_options.log_file:
-            Logger().set_custom_log_file(self.persist_options.log_file)
-    else:
-        self.persist_options = OutputSettings()
-    self.mongodb_writer = MongoDBWriter(self.persist_options)
-    self.pipeline_elements = []
-    self._pipe = None
-    self.optimum_pipe = None
-    self.metrics = metrics
-    #  Todo: raise error or warning if metrics and best config_metric is None
-    self.best_config_metric = best_config_metric
-    self.config_optimizer = None
-    self.result_tree = None
-    self.best_config = None
-    self.best_children_config = None
-    self.best_performance = None
-    self.is_final_fit = False
-    self.__mother_outer_fold_counter = 0
-    self.__mother_inner_fold_counter = 0
-    self.__mother_config_counter = 0
-    # containers for optimization history and logging
-    self._performance_history_list = []
-    if isinstance(optimizer, str):
-        # instantiate optimizer from string
-        #  Todo: check if optimizer strategy is already implemented
-        optimizer_class = self.OPTIMIZER_DICTIONARY[optimizer]
-        optimizer_instance = optimizer_class(**optimizer_params)
-        self.optimizer = optimizer_instance
-    else:
-        # Todo: check if correct object
-        self.optimizer = optimizer
-    self._validation_X = None
-    self._validation_y = None
-    self._test_X = None
-    self._test_y = None
-    self._last_fit_data_hash = None
-    self._current_fold = -1
-    self._num_of_folds = 0
-    self._is_mother_pipe = True
-    self._fold_data_hashes = []
-    self.inner_cv_callback_function = performance_constraints
-
- -
-
- -
- - -
-
-

def add(

self, pipe_element)

-
- - - - -
- -
-
def add(self, pipe_element):
-    self.__iadd__(pipe_element)
-
- -
-
- -
- - -
-
-

def config_to_dict(

self, specific_config)

-
- - - - -
- -
-
def config_to_dict(self, specific_config):
-    """
-    """
-    config = {}
-    for key, value in specific_config.items():
-        items = key.split('__')
-        name = items[0]
-        rest = '__'.join(items[1::])
-        if name in self._pipe.named_steps:
-            config.update(self._pipe.named_steps[name].prettify_config_output(rest, value, return_dict=True))
-            #config[name] = value
-    return config
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -

Helper function to copy all pipeline elements

-
- -
-
def copy_me(self):
-    """
-    Helper function to copy all pipeline elements
-    """
-    item_list =[]
-    for item in self.pipeline_elements:
-        item_list.append(item.copy_me())
-    return item_list
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets, **fit_params)

-
- - - - -

Starts the hyperparameter search and/or fits the pipeline to the data and targets

-

Manages the nested cross validated hyperparameter search:

-
    -
  1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
  2. -
  3. requests new configurations from the hyperparameter search strategy, the optimizer,
  4. -
  5. initializes the testing of a specific configuration,
  6. -
  7. communicates the result to the optimizer,
  8. -
  9. repeats 2-4 until optimizer delivers no more configurations to test
  10. -
  11. finally searches for the best config in all tested configs,
  12. -
  13. trains the pipeline with the best config and evaluates the performance on the test set
  14. -
-

Parameters

-
    -
  • -

    data [array-like, shape=[N, D]]: - the training and test data, where N is the number of samples and D is the number of features.

    -
  • -
  • -

    targets [array-like, shape=[N]]: - the truth values, where N is the number of samples.

    -
  • -
-

Returns

-
    -
  • 'self' - Returns self
  • -
-
- -
-
def fit(self, data, targets, **fit_params):
-    """
-    Starts the hyperparameter search and/or fits the pipeline to the data and targets
-    Manages the nested cross validated hyperparameter search:
-    1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
-    2. requests new configurations from the hyperparameter search strategy, the optimizer,
-    3. initializes the testing of a specific configuration,
-    4. communicates the result to the optimizer,
-    5. repeats 2-4 until optimizer delivers no more configurations to test
-    6. finally searches for the best config in all tested configs,
-    7. trains the pipeline with the best config and evaluates the performance on the test set
-    Parameters
-    ----------
-     * `data` [array-like, shape=[N, D]]:
-        the training and test data, where N is the number of samples and D is the number of features.
-     * `targets` [array-like, shape=[N]]:
-        the truth values, where N is the number of samples.
-    Returns
-    -------
-     * 'self'
-        Returns self
-    """
-    # in case we want to inject some data from outside the pipeline
-    self.X = data
-    self.y = targets
-    # !!!!!!!!!!!!!!!! FIT ONLY IF DATA CHANGED !!!!!!!!!!!!!!!!!!!
-    # -------------------------------------------------------------
-    # in case we need to reduce the dimension of the data due to parallelity of the outer pipe, lets do it.
-    if self.filter_element:
-        self.X = self.filter_element.transform(self.X)
-    # if the groups are imbalanced, and a strategy is chosen, apply it here
-    if self.imbalanced_data_strategy_filter:
-        self.imbalanced_data_strategy_filter.fit(self.X, self.y)
-        self.X, self.y = self.imbalanced_data_strategy_filter.transform()
-    self._current_fold += 1
-    # be compatible to list of (image-) files
-    if isinstance(self.X, list):
-        self.X = np.asarray(self.X)
-    if isinstance(self.y, list):
-        self.y = np.asarray(self.y)
-    # handle neuro Imge paths as data
-    # ToDo: Need to check the DATA, not the img paths for neuro
-    new_data_hash = sha1(np.asarray(self.X, order='C')).hexdigest()
-    # fit
-    # 1. if it is first time ever or
-    # 2. the data did change for that fold or
-    # 3. if it is the mother pipe (then number_of_folds = 0)
-    if (len(self._fold_data_hashes) < self._num_of_folds) \
-            or (self._num_of_folds > 0 and self._fold_data_hashes[self._current_fold] != new_data_hash) \
-            or self._num_of_folds == 0:
-        # save data hash for that fold
-        if self._num_of_folds > 0:
-            if len(self._fold_data_hashes) < self._num_of_folds:
-                self._fold_data_hashes.append(new_data_hash)
-            else:
-                self._fold_data_hashes[self._current_fold] = new_data_hash
-        # optimize: iterate through configs and save results
-        if not self.is_final_fit:
-            # first check if correct optimizer metric has been chosen
-            # pass pipeline_elements so that OptimizerMetric can look for last
-            # element and use the corresponding score method
-            self.config_optimizer = OptimizerMetric(self.best_config_metric, self.pipeline_elements, self.metrics)
-            self.metrics = self.config_optimizer.check_metrics()
-            if 'score' in self.metrics:
-                Logger().warn('Attention: Scoring with default score function of estimator can slow down calculations!')
-            # generate OUTER ! cross validation splits to iterate over
-            self._generate_outer_cv_indices()
-            outer_fold_counter = 0
-            if not self._is_mother_pipe:
-                self.result_tree_name = self.name + '_outer_fold_' + str(self.__mother_outer_fold_counter)  \
-                                        + '_inner_fold_' + str(self.__mother_inner_fold_counter)
-            else:
-                self.result_tree_name = self.name
-            # initialize result logging with hyperpipe class
-            self.result_tree = MDBHyperpipe(name=self.result_tree_name)
-            self.result_tree.outer_folds = []
-            self.result_tree.eval_final_performance = self.eval_final_performance
-            self.result_tree.best_config_metric = self.best_config_metric
-            # loop over outer cross validation
-            for train_indices, test_indices in self.data_test_cases:
-                # give the optimizer the chance to inform about elements
-                self.optimizer.prepare(self.pipeline_elements)
-                outer_fold_counter += 1
-                outer_fold_fit_start_time = time.time()
-                Logger().info('HYPERPARAMETER SEARCH OF {0}, Outer Cross validation Fold {1}'
-                              .format(self.name, outer_fold_counter))
-                t1 = time.time()
-                # Prepare Train and validation set data
-                self._validation_X = self.X[train_indices]
-                self._validation_y = self.y[train_indices]
-                self._test_X = self.X[test_indices]
-                self._test_y = self.y[test_indices]
-                # Prepare inner cross validation
-                cv_iter = list(self.inner_cv.split(self._validation_X, self._validation_y))
-                num_folds = len(cv_iter)
-                num_samples_train = len(self._validation_y)
-                num_samples_test = len(self._test_y)
-                # distribute number of folds to encapsulated child hyperpipes
-                self._distribute_cv_info_to_hyperpipe_children(num_of_folds=num_folds,
-                                                               outer_fold_counter=outer_fold_counter)
-                tested_config_counter = 0
-                # add outer fold info object to result tree
-                outer_fold = MDBOuterFold(fold_nr=outer_fold_counter)
-                outer_fold.tested_config_list = []
-                self.result_tree.outer_folds.append(outer_fold)
-                # do the optimizing
-                for current_config in self.optimizer.next_config:
-                    self._distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)
-                    hp = TestPipeline(self._pipe, current_config, self.metrics, self.update_mother_inner_fold_nr,
-                                      mongo_db_settings=self.persist_options,
-                                      callback_function=self.inner_cv_callback_function)
-                    Logger().debug('optimizing of:' + self.name)
-                    Logger().debug(self._optimize_printing(current_config))
-                    Logger().debug('calculating...')
-                    # Test the configuration cross validated by inner_cv object
-                    current_config_mdb = hp.calculate_cv_score(self._validation_X, self._validation_y, cv_iter,
-                                                        calculate_metrics_per_fold=self.calculate_metrics_per_fold,
-                                                        calculate_metrics_across_folds=self.calculate_metrics_across_folds)
-                    current_config_mdb.config_nr = tested_config_counter
-                    current_config_mdb.config_dict = current_config
-                    current_config_mdb.pipe_name = self.name
-                    tested_config_counter += 1
-                    current_config_mdb.human_readable_config = self.config_to_dict(current_config)
-                    # save the configuration of all children pipelines
-                    children_config = {}
-                    children_config_ref_list = []
-                    for pipe_step in self._pipe.steps:
-                        item = pipe_step[1]
-                        if isinstance(item, Hyperpipe):
-                            if item.local_search and item.best_config is not None:
-                                children_config[item.name] = item.best_config
-                        elif isinstance(item, PipelineStacking):
-                            for subhyperpipe_name, hyperpipe in item.pipe_elements.items():
-                                if isinstance(hyperpipe, Hyperpipe):
-                                    if hyperpipe.local_search and hyperpipe.best_config is not None:
-                                        # special case: we need to access pipe over pipeline_stacking element
-                                        children_config[item.name + '__' + subhyperpipe_name] = hyperpipe.best_config.config_dict
-                                    # children_config_ref_list.append(hyperpipe.best_config_outer_fold._id)
-                    specific_parameters = self._pipe.get_params()
-                    #current_config_mdb.full_model_spec = specific_parameters
-                    current_config_mdb.children_config_dict = children_config
-                    current_config_mdb.children_config_ref = children_config_ref_list
-                    Logger().verbose(self._optimize_printing(current_config))
-                    if not current_config_mdb.config_failed:
-                        # get optimizer_metric and forward to optimizer
-                        # todo: also pass greater_is_better=True/False to optimizer
-                        metric_train = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric)
-                        metric_test = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric, train=False)
-                        #
-                        # if not metric_train or metric_test:
-                        #     raise Exception("Config did not fail, but did not get any metrics either....!!?")
-                        config_performance = (metric_train, metric_test)
-                        # Print Result for config
-                        Logger().debug('...done:')
-                        Logger().verbose(self.config_optimizer.metric + str(config_performance))
-                    else:
-                         config_performance = (-1, -1)
-                         # Print Result for config
-                         Logger().debug('...failed:')
-                         Logger().error(current_config_mdb.config_error)
-                    # add config to result tree and do intermediate saving
-                    self.result_tree.outer_folds[-1].tested_config_list.append(current_config_mdb)
-                    # Todo: add try catch in case config cannot be written
-                    self.mongodb_writer.save(self.result_tree)
-                    # 3. inform optimizer about performance
-                    self.optimizer.evaluate_recent_performance(current_config, config_performance)
-                if tested_config_counter > 0:
-                    best_config_outer_fold = self.config_optimizer.get_optimum_config(outer_fold.tested_config_list)
-                    if not best_config_outer_fold:
-                        raise Exception("No best config was found!")
-                    best_config_outer_fold_mdb = MDBConfig()
-                    best_config_outer_fold_mdb.children_config_dict = best_config_outer_fold.children_config_dict
-                    best_config_outer_fold_mdb.pipe_name = self.name
-                    best_config_outer_fold_mdb.children_config_ref = best_config_outer_fold.children_config_ref
-                    # best_config_outer_fold_mdb.best_config_ref_to_train_item = best_config_outer_fold._id
-                    best_config_outer_fold_mdb.config_dict = best_config_outer_fold.config_dict
-                    best_config_outer_fold_mdb.human_readable_config = best_config_outer_fold.human_readable_config
-                    # inform user
-                    Logger().info('finished optimization of ' + self.name)
-                    Logger().verbose('Result')
-                    Logger().verbose('Number of tested configurations:' + str(tested_config_counter))
-                    Logger().verbose('Optimizer metric: ' + self.config_optimizer.metric + '\n' +
-                                     '   --> Greater is better: ' + str(self.config_optimizer.greater_is_better))
-                    Logger().info('Best config: ' + self._optimize_printing(best_config_outer_fold_mdb.config_dict) +
-                                  '\n' + '... with children config: '
-                                  + self._optimize_printing(best_config_outer_fold_mdb.children_config_dict))
-                    # ... and create optimal pipeline
-                    self.optimum_pipe = self._pipe
-                    # set self to best config
-                    self.optimum_pipe.set_params(**best_config_outer_fold_mdb.config_dict)
-                    # set all children to best config and inform to NOT optimize again, ONLY fit
-                    for child_name, child_config in best_config_outer_fold_mdb.children_config_dict.items():
-                        if child_config:
-                            # in case we have a pipeline stacking we need to identify the particular subhyperpipe
-                            splitted_name = child_name.split('__')
-                            if len(splitted_name) > 1:
-                                stacking_element = self.optimum_pipe.named_steps[splitted_name[0]]
-                                pipe_element = stacking_element.pipe_elements[splitted_name[1]]
-                            else:
-                                pipe_element = self.optimum_pipe.named_steps[child_name]
-                            pipe_element.set_params(**child_config)
-                            pipe_element.is_final_fit = True
-                    self._distribute_cv_info_to_hyperpipe_children(reset=True)
-                    Logger().verbose('...now fitting ' + self.name + ' with optimum configuration')
-                    fit_time_start = time.time()
-                    self.optimum_pipe.fit(self._validation_X, self._validation_y)
-                    final_fit_duration = time.time() - fit_time_start
-                    #self.best_config_outer_fold.full_model_spec = self.optimum_pipe.get_params()
-                    best_config_outer_fold_mdb.fit_duration_minutes = final_fit_duration
-                    self.result_tree.outer_folds[-1].best_config = best_config_outer_fold_mdb
-                    self.result_tree.outer_folds[-1].best_config.inner_folds = []
-                    if self.eval_final_performance:
-                        # Todo: generate mean and std over outer folds as well. move this items to the top
-                        Logger().verbose('...now predicting ' + self.name + ' unseen data')
-                        test_score_mdb = TestPipeline.score(self.optimum_pipe, self._test_X, self._test_y,
-                                                            self.metrics,
-                                                            save_predictions=self.persist_options.save_predictions,
-                                                            save_feature_importances=self.persist_options.save_feature_importances)
-                        Logger().info('.. calculating metrics for test set (' + self.name + ')')
-                        Logger().verbose('...now predicting ' + self.name + ' final model with training data')
-                        train_score_mdb = TestPipeline.score(self.optimum_pipe, self._validation_X, self._validation_y,
-                                                             self.metrics,
-                                                             save_predictions=self.persist_options.save_predictions,
-                                                             save_feature_importances=self.persist_options.save_feature_importances)
-                        # save test fold
-                        outer_fold_mdb = MDBInnerFold()
-                        outer_fold_mdb.fold_nr = 1
-                        outer_fold_mdb.number_samples_training = num_samples_train
-                        outer_fold_mdb.number_samples_validation = num_samples_test
-                        outer_fold_mdb.training = train_score_mdb
-                        outer_fold_mdb.validation = test_score_mdb
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-                        Logger().info('PERFORMANCE TRAIN:')
-                        for m_key, m_value in train_score_mdb.metrics.items():
-                            Logger().info(str(m_key) + ": " + str(m_value))
-                        Logger().info('PERFORMANCE TEST:')
-                        for m_key, m_value in test_score_mdb.metrics.items():
-                                Logger().info(str(m_key) + ": " + str(m_value))
-                    else:
-                        # save test fold
-                        outer_fold_mdb = MDBInnerFold()
-                        outer_fold_mdb.fold_nr = 1
-                        outer_fold_mdb.number_samples_training = num_samples_train
-                        outer_fold_mdb.number_samples_validation = num_samples_test
-                        def _copy_inner_fold_means(metric_dict):
-                            # We copy all mean values from validation to the best config
-                            # training
-                            train_item_metrics = {}
-                            for m in metric_dict:
-                                if m.operation == str(FoldOperations.MEAN):
-                                    train_item_metrics[m.metric_name] = m.value
-                            train_item = MDBScoreInformation()
-                            train_item.metrics_copied_from_inner = True
-                            train_item.metrics = train_item_metrics
-                            return train_item
-                        # training
-                        outer_fold_mdb.training = _copy_inner_fold_means(best_config_outer_fold.metrics_train)
-                        # validation
-                        outer_fold_mdb.validation = _copy_inner_fold_means(best_config_outer_fold.metrics_test)
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-                Logger().info('This took {} minutes.'.format((time.time() - t1) / 60))
-                self.result_tree.time_of_results = datetime.datetime.now()
-                self.mongodb_writer.save(self.result_tree)
-                self._distribute_cv_info_to_hyperpipe_children(reset_final_fit=True, outer_fold_counter=outer_fold_counter)
-            # Compute all final metrics
-            self.result_tree.metrics_train, self.result_tree.metrics_test = MDBHelper.aggregate_metrics(self.result_tree.outer_folds,
-                                                                                                        self.metrics)
-            # save result tree to db or file or both
-            self.mongodb_writer.save(self.result_tree)
-            Logger().info("Saved result tree to database")
-            # Find best config across outer folds
-            self.best_config = self.config_optimizer.get_optimum_config_outer_folds(self.result_tree.outer_folds)
-            self.result_tree.best_config = self.best_config
-            Logger().info('OVERALL BEST CONFIGURATION')
-            Logger().info('--------------------------')
-            Logger().info(self._optimize_printing(self.best_config.config_dict) +
-                          '\n' + '... with children config: '
-                          + self._optimize_printing(self.best_config.children_config_dict))
-            # set self to best config
-            self.optimum_pipe = self._pipe
-            self.optimum_pipe.set_params(**self.best_config.config_dict)
-            self.optimum_pipe.fit(self._validation_X, self._validation_y)
-            # save results again
-            self.mongodb_writer.save(self.result_tree)
-            Logger().info("Saved overall best config to database")
-        ###############################################################################################
-        else:
-            self._pipe.fit(self.X, self.y, **fit_params)
-    else:
-        Logger().verbose("Avoided fitting of " + self.name + " on fold "
-                         + str(self._current_fold) + " because data did not change")
-        Logger().verbose('Best config of ' + self.name + ' : ' + str(self.best_config))
-    return self
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Retrieve parameters from sklearn pipeline

-
- -
-
def get_params(self, deep=True):
-    """
-    Retrieve parameters from sklearn pipeline
-    """
-    if self._pipe is not None:
-        return self._pipe.get_params(deep)
-    else:
-        return None
-
- -
-
- -
- - -
-
-

def inverse_transform_pipeline(

self, hyperparameters, data, targets, data_to_inverse)

-
- - - - -

Inverse transform data for a pipeline with specific hyperparameter configuration

-
    -
  1. Copy Sklearn Pipeline,
  2. -
  3. Set Parameters
  4. -
  5. Fit Pipeline to data and targets
  6. -
  7. Inverse transform data with that pipeline
  8. -
-

Parameters

-
    -
  • 'hyperparameters' [dict]: - The concrete configuration settings for the pipeline elements
  • -
  • 'data' [array-like]: - The training data to which the pipeline is fitted
  • -
  • 'targets' [array-like]: - The truth values for training
  • -
  • 'data_to_inverse' [array-like]: - The data that should be inversed after training
  • -
-

Returns

-

Inversed data as array

-
- -
-
def inverse_transform_pipeline(self, hyperparameters: dict, data, targets, data_to_inverse):
-    """
-    Inverse transform data for a pipeline with specific hyperparameter configuration
-    1. Copy Sklearn Pipeline,
-    2. Set Parameters
-    3. Fit Pipeline to data and targets
-    4. Inverse transform data with that pipeline
-    Parameters
-    ----------
-    * 'hyperparameters' [dict]:
-        The concrete configuration settings for the pipeline elements
-    * 'data' [array-like]:
-        The training data to which the pipeline is fitted
-    * 'targets' [array-like]:
-        The truth values for training
-    * 'data_to_inverse' [array-like]:
-        The data that should be inversed after training
-    Returns
-    -------
-    Inversed data as array
-    """
-    copied_pipe = self._copy_pipeline()
-    copied_pipe.set_params(**hyperparameters)
-    copied_pipe.fit(data, targets)
-    return copied_pipe.inverse_transform(data_to_inverse)
-
- -
-
- -
- - -
-
-

def load_optimum_pipe(

file)

-
- - - - -

Load optimal pipeline.

-

Parameters

-
    -
  • 'file' [str]: - File path specifying .photon file to load optimal pipeline from
  • -
-

Returns

-

sklearn Pipeline with all trained photon_pipelines

-
- -
-
@staticmethod
-def load_optimum_pipe(file):
-    """
-    Load optimal pipeline.
-    Parameters
-    ----------
-    * 'file' [str]:
-        File path specifying .photon file to load optimal pipeline from
-    Returns
-    -------
-    sklearn Pipeline with all trained photon_pipelines
-    """
-    if file.endswith('.photon'):
-        archive_name = os.path.splitext(file)[0]
-        folder = archive_name + '/'
-        zf = zipfile.ZipFile(file)
-        zf.extractall(folder)
-    else:
-        raise FileNotFoundError('Specify .photon file that holds PHOTON optimum pipe.')
-    setup_info = pickle.load(open(folder + '_optimum_pipe_blueprint.pkl', 'rb'))
-    element_list = list()
-    for element_info in setup_info:
-        if element_info['mode'] == 'custom':
-            spec = importlib.util.spec_from_file_location(element_info['element_name'],
-                                                          folder + element_info['wrapper_script'])
-            imported_module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(imported_module)
-            base_element = getattr(imported_module, element_info['element_name'])
-            custom_element = PipelineElement(name=element_info['element_name'], base_element=base_element(),
-                                             hyperparameters=element_info['hyperparameters'],
-                                             test_disabled=element_info['test_disabled'],
-                                             disabled=element_info['disabled'])
-            custom_element.base_element.load(folder + element_info['filename'])
-            element_list.append((element_info['element_name'], custom_element))
-        else:
-            element_list.append((element_info['element_name'], joblib.load(folder + element_info['filename'] + '.pkl')))
-    return Pipeline(element_list)
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Use the optimum pipe to predict the data

-

Returns

-
predicted targets
-
-
- -
-
def predict(self, data):
-    """
-    Use the optimum pipe to predict the data
-    Returns
-    -------
-        predicted targets
-    """
-    # Todo: if local_search = true then use optimized pipe here?
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.predict(data)
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities

-

Returns

-

predicted probabilities

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    Returns
-    -------
-    predicted probabilities
-    """
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.predict_proba(data)
-
- -
-
- -
- - -
-
-

def prettify_config_output(

config_name, config_value)

-
- - - - -

Print the disabled = False as Enabled = True for better human reading

-
- -
-
@staticmethod
-def prettify_config_output(config_name: str, config_value):
-    """
-    Print the disabled = False as Enabled = True for better human reading
-    """
-    if config_name == "disabled" and config_value is False:
-        return "enabled = True"
-    else:
-        return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def save_optimum_pipe(

self, file)

-
- - - - -

Save optimal pipeline only. Complete hyperpipe will no not be saved.

-

Parameters

-
    -
  • 'file' [str]: - File path as string specifying file to save pipeline to
  • -
-
- -
-
def save_optimum_pipe(self, file):
-    """
-    Save optimal pipeline only. Complete hyperpipe will no not be saved.
-    Parameters
-    ----------
-    * 'file' [str]:
-        File path as string specifying file to save pipeline to
-    """
-    element_number = 0
-    element_identifier = list()
-    folder = os.path.splitext(file)[0]
-    file = os.path.splitext(file)[0] + '.photon'
-    if os.path.exists(folder):
-        raise FileExistsError('Trying to save optimum pipe: The file you specified already exists as a folder.')
-    else:
-        os.mkdir(folder)
-        folder = folder + '/'
-    wrapper_files = list()
-    for element_name, element in self.optimum_pipe.named_steps.items():
-        filename = '_optimum_pipe_' + str(element_number) + '_' + element_name
-        element_identifier.append({'element_name': element_name,
-                                   'filename': filename})
-        if hasattr(element.base_element, 'save'):
-            element.base_element.save(folder + filename)
-            element_identifier[-1]['mode'] = 'custom'
-            element_identifier[-1]['wrapper_script'] = os.path.basename(inspect.getfile(element.base_element.__class__))
-            wrapper_files.append(inspect.getfile(element.base_element.__class__))
-            element_identifier[-1]['test_disabled'] = element.test_disabled
-            element_identifier[-1]['disabled'] = element.disabled
-            element_identifier[-1]['hyperparameters'] = element.hyperparameters
-        else:
-            try:
-                joblib.dump(element, folder + filename + '.pkl', compress=1)
-                element_identifier[-1]['mode'] = 'pickle'
-            except:
-                raise NotImplementedError("Custom pipeline element must implement .save() method or "
-                                          "allow pickle.")
-        element_number += 1
-    # save pipeline blueprint to make loading of pipeline easier
-    with open(folder + '_optimum_pipe_blueprint.pkl', 'wb') as f:
-        pickle.dump(element_identifier, f)
-    # get all files
-    files = glob.glob(folder + '_optimum_pipe_*')
-    with zipfile.ZipFile(file, 'w') as myzip:
-        for f in files:
-            myzip.write(f, os.path.basename(f))
-            os.remove(f)
-        for f in wrapper_files:
-            myzip.write(f, os.path.splitext(os.path.basename(f))[0] + '.py')
-    os.removedirs(folder)
-
- -
-
- -
- - -
-
-

def set_params(

self, **params)

-
- - - - -

Give parameter values to the pipeline elements

-
- -
-
def set_params(self, **params):
-    """
-    Give parameter values to the pipeline elements
-    """
-    if self._pipe is not None:
-        self._pipe.set_params(**params)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Use the optimum pipe to transform the data

-
- -
-
def transform(self, data):
-    """
-    Use the optimum pipe to transform the data
-    """
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.transform(data)
-
- -
-
- -
- - -
-
-

def update_mother_inner_fold_nr(

self, new_inner_fold_nr)

-
- - - - -

Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children

-

Parameters

-
    -
  • 'new_inner_fold_nr' [int]: - in which inner_fold the mother hyperpipe currently is
  • -
-
- -
-
def update_mother_inner_fold_nr(self, new_inner_fold_nr: int):
-    """
-    Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children
-    Parameters
-    ----------
-    * 'new_inner_fold_nr' [int]:
-        in which inner_fold the mother hyperpipe currently is
-    """
-    self._distribute_cv_info_to_hyperpipe_children(inner_fold_counter=new_inner_fold_nr)
-
- -
-
- -
- -

Instance variables

-
-

var X

- - - - -
-
- -
-
-

var best_children_config

- - - - -
-
- -
-
-

var best_config

- - - - -
-
- -
-
-

var best_config_metric

- - - - -
-
- -
-
-

var best_performance

- - - - -
-
- -
-
-

var calculate_metrics_across_folds

- - - - -
-
- -
-
-

var calculate_metrics_per_fold

- - - - -
-
- -
-
-

var config_optimizer

- - - - -
-
- -
-
-

var cv_iter

- - - - -
-
- -
-
-

var data_test_cases

- - - - -
-
- -
-
-

var eval_final_performance

- - - - -
-
- -
-
-

var filter_element

- - - - -
-
- -
-
-

var fit_duration

- - - - -
-
- -
-
-

var groups

- - - - -
-
- -
-
-

var inner_cv

- - - - -
-
- -
-
-

var inner_cv_callback_function

- - - - -
-
- -
-
-

var is_final_fit

- - - - -
-
- -
-
-

var metrics

- - - - -
-
- -
-
-

var mongodb_writer

- - - - -
-
- -
-
-

var name

- - - - -
-
- -
-
-

var optimum_pipe

- - - - -
-
- -
-
-

var outer_cv

- - - - -
-
- -
-
-

var pipeline_elements

- - - - -
-
- -
-
-

var result_tree

- - - - -
-
- -
-
-

var test_size

- - - - -
-
- -
-
-

var y

- - - - -
-
- -
-
-
- -
-

class ImbalancedDataTransform

- - -

Applies the chosen strategy to the data in order to handle the imbalance in the data. -Instantiates the strategy filter object according to the name given as string.

-
- -
-
class ImbalancedDataTransform(BaseEstimator, TransformerMixin):
-    """
-    Applies the chosen strategy to the data in order to handle the imbalance in the data.
-    Instantiates the strategy filter object according to the name given as string.
-    """
-    _estimator_type = "transformer"
-
-    IMBALANCED_DICT = {
-        'oversampling': ["RandomOverSampler", "SMOTE", "ADASYN"],
-        'undersampling': ["ClusterCentroids",
-                          "RandomUnderSampler",
-                          "NearMiss",
-                          "InstanceHardnessThreshold",
-                          "CondensedNearestNeighbour",
-                          "EditedNearestNeighbours",
-                          "RepeatedEditedNearestNeighbours",
-                          "AllKNN",
-                          "NeighbourhoodCleaningRule",
-                          "OneSidedSelection"],
-        'combine': ["SMOTEENN", "SMOTETomek"]
-    }
-
-    def __init__(self, method_name: str, **kwargs):
-        """
-        Instantiates an object that transforms the data into balanced groups according to the given method
-
-        Possible values for method_name:
-        imbalance_type = OVERSAMPLING:
-            - RandomOverSampler
-            - SMOTE
-            - ADASYN
-
-        imbalance_type = UNDERSAMPLING:
-            - ClusterCentroids,
-            - RandomUnderSampler,
-            - NearMiss,
-            - InstanceHardnessThreshold,
-            - CondensedNearestNeighbour,
-            - EditedNearestNeighbours,
-            - RepeatedEditedNearestNeighbours,
-            - AllKNN,
-            - NeighbourhoodCleaningRule,
-            - OneSidedSelection
-
-        imbalance_type = COMBINE:
-            - SMOTEENN,
-            - SMOTETomek
-
-        :param method_name: which imbalanced strategy to use
-        :type method_name: str
-        :param kwargs: any parameters to pass to the imbalance strategy object
-        :type kwargs:  dict
-        """
-
-        self.method_name = method_name
-
-        imbalance_type = ''
-        for group, possible_strategies in ImbalancedDataTransform.IMBALANCED_DICT:
-            if method_name in possible_strategies:
-                imbalance_type = group
-
-        if imbalance_type == "oversampling":
-            home = "over_sampling"
-        elif imbalance_type == "undersampling":
-            home = "under_sampling"
-        elif imbalance_type =="combine" or imbalance_type =="combination":
-            home = "combine"
-        else:
-            raise Exception("Imbalance Type not found. Can be oversampling, undersampling or combine")
-
-        # Todo: Try Catch Class Not Found Exception
-
-        desired_class_home = "imblearn." + home
-        desired_class_name = method_name
-
-        try:
-            imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-            desired_class = getattr(imported_module, desired_class_name)
-        except Exception as e:
-            raise Exception("Imbalance Type not found. Can be oversampling, undersampling or combine")
-
-        self.method = desired_class(**kwargs)
-
-        self.x_transformed = None
-        self.y_transformed = None
-
-    def fit_sample(self, X, y):
-
-        # ATTENTION: Works only if fit is called before transform!!!
-        self.x_transformed, self.y_transformed = self.method.fit_sample(X, y)
-        return self.x_transformed, self.y_transformed
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var IMBALANCED_DICT

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, method_name, **kwargs)

-
- - - - -

Instantiates an object that transforms the data into balanced groups according to the given method

-

Possible values for method_name: -imbalance_type = OVERSAMPLING: - - RandomOverSampler - - SMOTE - - ADASYN

-

imbalance_type = UNDERSAMPLING: - - ClusterCentroids, - - RandomUnderSampler, - - NearMiss, - - InstanceHardnessThreshold, - - CondensedNearestNeighbour, - - EditedNearestNeighbours, - - RepeatedEditedNearestNeighbours, - - AllKNN, - - NeighbourhoodCleaningRule, - - OneSidedSelection

-

imbalance_type = COMBINE: - - SMOTEENN, - - SMOTETomek

-

:param method_name: which imbalanced strategy to use -:type method_name: str -:param kwargs: any parameters to pass to the imbalance strategy object -:type kwargs: dict

-
- -
-
def __init__(self, method_name: str, **kwargs):
-    """
-    Instantiates an object that transforms the data into balanced groups according to the given method
-    Possible values for method_name:
-    imbalance_type = OVERSAMPLING:
-        - RandomOverSampler
-        - SMOTE
-        - ADASYN
-    imbalance_type = UNDERSAMPLING:
-        - ClusterCentroids,
-        - RandomUnderSampler,
-        - NearMiss,
-        - InstanceHardnessThreshold,
-        - CondensedNearestNeighbour,
-        - EditedNearestNeighbours,
-        - RepeatedEditedNearestNeighbours,
-        - AllKNN,
-        - NeighbourhoodCleaningRule,
-        - OneSidedSelection
-    imbalance_type = COMBINE:
-        - SMOTEENN,
-        - SMOTETomek
-    :param method_name: which imbalanced strategy to use
-    :type method_name: str
-    :param kwargs: any parameters to pass to the imbalance strategy object
-    :type kwargs:  dict
-    """
-    self.method_name = method_name
-    imbalance_type = ''
-    for group, possible_strategies in ImbalancedDataTransform.IMBALANCED_DICT:
-        if method_name in possible_strategies:
-            imbalance_type = group
-    if imbalance_type == "oversampling":
-        home = "over_sampling"
-    elif imbalance_type == "undersampling":
-        home = "under_sampling"
-    elif imbalance_type =="combine" or imbalance_type =="combination":
-        home = "combine"
-    else:
-        raise Exception("Imbalance Type not found. Can be oversampling, undersampling or combine")
-    # Todo: Try Catch Class Not Found Exception
-    desired_class_home = "imblearn." + home
-    desired_class_name = method_name
-    try:
-        imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-        desired_class = getattr(imported_module, desired_class_name)
-    except Exception as e:
-        raise Exception("Imbalance Type not found. Can be oversampling, undersampling or combine")
-    self.method = desired_class(**kwargs)
-    self.x_transformed = None
-    self.y_transformed = None
-
- -
-
- -
- - -
-
-

def fit_sample(

self, X, y)

-
- - - - -
- -
-
def fit_sample(self, X, y):
-    # ATTENTION: Works only if fit is called before transform!!!
-    self.x_transformed, self.y_transformed = self.method.fit_sample(X, y)
-    return self.x_transformed, self.y_transformed
-
- -
-
- -
- - -
-
-

def fit_transform(

self, X, y=None, **fit_params)

-
- - - - -

Fit to data, then transform it.

-

Fits transformer to X and y with optional parameters fit_params -and returns a transformed version of X.

-

Parameters

-

X : numpy array of shape [n_samples, n_features] - Training set.

-

y : numpy array of shape [n_samples] - Target values.

-

Returns

-

X_new : numpy array of shape [n_samples, n_features_new] - Transformed array.

-
- -
-
def fit_transform(self, X, y=None, **fit_params):
-    """Fit to data, then transform it.
-    Fits transformer to X and y with optional parameters fit_params
-    and returns a transformed version of X.
-    Parameters
-    ----------
-    X : numpy array of shape [n_samples, n_features]
-        Training set.
-    y : numpy array of shape [n_samples]
-        Target values.
-    Returns
-    -------
-    X_new : numpy array of shape [n_samples, n_features_new]
-        Transformed array.
-    """
-    # non-optimized default implementation; override when a better
-    # method is possible for a given clustering algorithm
-    if y is None:
-        # fit method of arity 1 (unsupervised transformation)
-        return self.fit(X, **fit_params).transform(X)
-    else:
-        # fit method of arity 2 (supervised transformation)
-        return self.fit(X, y, **fit_params).transform(X)
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Get parameters for this estimator.

-

Parameters

-

deep : boolean, optional - If True, will return the parameters for this estimator and - contained subobjects that are estimators.

-

Returns

-

params : mapping of string to any - Parameter names mapped to their values.

-
- -
-
def get_params(self, deep=True):
-    """Get parameters for this estimator.
-    Parameters
-    ----------
-    deep : boolean, optional
-        If True, will return the parameters for this estimator and
-        contained subobjects that are estimators.
-    Returns
-    -------
-    params : mapping of string to any
-        Parameter names mapped to their values.
-    """
-    out = dict()
-    for key in self._get_param_names():
-        # We need deprecation warnings to always be on in order to
-        # catch deprecated param values.
-        # This is set in utils/__init__.py but it gets overwritten
-        # when running under python3 somehow.
-        warnings.simplefilter("always", DeprecationWarning)
-        try:
-            with warnings.catch_warnings(record=True) as w:
-                value = getattr(self, key, None)
-            if len(w) and w[0].category == DeprecationWarning:
-                # if the parameter is deprecated, don't show it
-                continue
-        finally:
-            warnings.filters.pop(0)
-        # XXX: should we rather test if instance of estimator?
-        if deep and hasattr(value, 'get_params'):
-            deep_items = value.get_params().items()
-            out.update((key + '__' + k, val) for k, val in deep_items)
-        out[key] = value
-    return out
-
- -
-
- -
- - -
-
-

def set_params(

self, **params)

-
- - - - -

Set the parameters of this estimator.

-

The method works on simple estimators as well as on nested objects -(such as pipelines). The latter have parameters of the form -<component>__<parameter> so that it's possible to update each -component of a nested object.

-

Returns

-

self

-
- -
-
def set_params(self, **params):
-    """Set the parameters of this estimator.
-    The method works on simple estimators as well as on nested objects
-    (such as pipelines). The latter have parameters of the form
-    ``<component>__<parameter>`` so that it's possible to update each
-    component of a nested object.
-    Returns
-    -------
-    self
-    """
-    if not params:
-        # Simple optimization to gain speed (inspect is slow)
-        return self
-    valid_params = self.get_params(deep=True)
-    for key, value in six.iteritems(params):
-        split = key.split('__', 1)
-        if len(split) > 1:
-            # nested objects case
-            name, sub_name = split
-            if name not in valid_params:
-                raise ValueError('Invalid parameter %s for estimator %s. '
-                                 'Check the list of available parameters '
-                                 'with `estimator.get_params().keys()`.' %
-                                 (name, self))
-            sub_object = valid_params[name]
-            sub_object.set_params(**{sub_name: value})
-        else:
-            # simple objects case
-            if key not in valid_params:
-                raise ValueError('Invalid parameter %s for estimator %s. '
-                                 'Check the list of available parameters '
-                                 'with `estimator.get_params().keys()`.' %
-                                 (key, self.__class__.__name__))
-            setattr(self, key, value)
-    return self
-
- -
-
- -
- -

Instance variables

-
-

var method

- - - - -
-
- -
-
-

var method_name

- - - - -
-
- -
-
-

var x_transformed

- - - - -
-
- -
-
-

var y_transformed

- - - - -
-
- -
-
-
- -
-

class OutputSettings

- - -
- -
-
class OutputSettings:
-
-    def __init__(self, mongodb_connect_url: str = None,
-                 save_predictions: bool = False,
-                 save_feature_importances: bool = False,
-                 local_file: str = '',
-                 log_filename: str = ''):
-
-        self.mongodb_connect_url = mongodb_connect_url
-        self.save_predictions = save_predictions
-        # coef_ or feature_importances
-        self.save_feature_importances = save_feature_importances
-        self.local_file = local_file
-        self.log_file = log_filename
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, mongodb_connect_url=None, save_predictions=False, save_feature_importances=False, local_file='', log_filename='')

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, mongodb_connect_url: str = None,
-             save_predictions: bool = False,
-             save_feature_importances: bool = False,
-             local_file: str = '',
-             log_filename: str = ''):
-    self.mongodb_connect_url = mongodb_connect_url
-    self.save_predictions = save_predictions
-    # coef_ or feature_importances
-    self.save_feature_importances = save_feature_importances
-    self.local_file = local_file
-    self.log_file = log_filename
-
- -
-
- -
- -

Instance variables

-
-

var local_file

- - - - -
-
- -
-
-

var log_file

- - - - -
-
- -
-
-

var mongodb_connect_url

- - - - -
-
- -
-
-

var save_feature_importances

- - - - -
-
- -
-
-

var save_predictions

- - - - -
-
- -
-
-
- -
-

class PipelineBranch

- - -

A substream of pipeline elements that is encapsulated e.g. for parallelization

-

Parameters

-
    -
  • 'name' [str]: - Name of the encapsulated item and/or summary of the encapsulated element's functions
  • -
-
- -
-
class PipelineBranch(PipelineElement):
-    """
-     A substream of pipeline elements that is encapsulated e.g. for parallelization
-
-     Parameters
-     ----------
-        * 'name' [str]:
-            Name of the encapsulated item and/or summary of the encapsulated element's functions
-
-        """
-
-    def __init__(self, name):
-
-        super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)
-
-        self.pipeline_elements = []
-
-    def __iadd__(self, pipe_element):
-        """
-        Add an element to the sub pipeline
-        Returns self
-
-        Parameters
-        ----------
-        * 'pipe_element' [PipelineElement or Hyperpipe]:
-            The object to add, being either a transformer or an estimator.
-
-        """
-        self.pipeline_elements.append(pipe_element)
-        self._prepare_pipeline()
-        return self
-
-    def add(self, pipe_element):
-        self.__iadd__(pipe_element)
-
-    def _prepare_pipeline(self):
-        """ Generates sklearn pipeline with all underlying steps """
-        pipeline_steps = []
-
-        for item in self.pipeline_elements:
-            # pipeline_steps.append((item.name, item.base_element))
-            pipeline_steps.append((item.name, item))
-            self._hyperparameters[item.name] = item.hyperparameters
-
-        self.generate_sklearn_hyperparameters()
-        self.base_element = Pipeline(pipeline_steps)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value):
-        """
-        Setting hyperparameters does not make sense, only the items that added can be optimized, not the container (self)
-        """
-        return None
-
-    def generate_config_grid(self):
-        return create_global_config_grid(self.pipeline_elements, self.name)
-
-    def generate_sklearn_hyperparameters(self):
-        """
-        Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-        """
-        self._hyperparameters = {}
-        for element in self.pipeline_elements:
-            for attribute, value_list in element.hyperparameters.items():
-                self._hyperparameters[self.name + '__' + attribute] = value_list
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name)

-
- - - - -

Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)

-

Returns

-

instantiated class object

-
- -
-
def __init__(self, name):
-    super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)
-    self.pipeline_elements = []
-
- -
-
- -
- - -
-
-

def add(

self, pipe_element)

-
- - - - -
- -
-
def add(self, pipe_element):
-    self.__iadd__(pipe_element)
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls the fit function of the base element

-

Returns

-

self

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls the fit function of the base element
-    Returns
-    ------
-    self
-    """
-    if not self.disabled:
-        obj = self.base_element
-        obj.fit(data, targets)
-        # self.base_element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    return create_global_config_grid(self.pipeline_elements, self.name)
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for element in self.pipeline_elements:
-        for attribute, value_list in element.hyperparameters.items():
-            self._hyperparameters[self.name + '__' + attribute] = value_list
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep: bool=True):
-    """
-    Forwards the get_params request to the wrapped base element
-    """
-    return self.base_element.get_params(deep)
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Calls predict function on the base element.

-

IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM. -This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer. -Sklearn usually expects the last element to predict. -Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after -training only used for transforming.

-
- -
-
def predict(self, data):
-    """
-    Calls predict function on the base element.
-    IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-    This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-    Sklearn usually expects the last element to predict.
-    Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-    training only used for transforming.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        elif hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        else:
-            Logger().error('BaseException. base Element should have function ' +
-                           'predict, or at least transform.')
-            raise BaseException('base Element should have function predict, or at least transform.')
-    else:
-        return data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities -base element needs predict_proba() function, otherwise throw -base exception.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    base element needs predict_proba() function, otherwise throw
-    base exception.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict_proba'):
-            return self.base_element.predict_proba(data)
-        else:
-            Logger().error('BaseException. base Element should have "predict_proba" function.')
-        raise BaseException('base Element should have predict_proba function.')
-    return data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calls the score function on the base element: -Returns a goodness of fit measure or a likelihood of unseen data:

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calls the score function on the base element:
-    Returns a goodness of fit measure or a likelihood of unseen data:
-    """
-    return self.base_element.score(X_test, y_test)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Forwards the set_params request to the wrapped base element -Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper

-
- -
-
def set_params(self, **kwargs):
-    """
-    Forwards the set_params request to the wrapped base element
-    Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-    """
-    # element disable is a construct used for this container only
-    if self._sklearn_disabled in kwargs:
-        self.disabled = kwargs[self._sklearn_disabled]
-        del kwargs[self._sklearn_disabled]
-    elif 'disabled' in kwargs:
-        self.disabled = kwargs['disabled']
-        del kwargs['disabled']
-    self.base_element.set_params(**kwargs)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on the base element.

-

IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT. -This is used if we are using an estimator as a preprocessing step.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on the base element.
-    IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-    This is used if we are using an estimator as a preprocessing step.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        elif hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        else:
-            Logger().error('BaseException: transform-predict-mess')
-            raise BaseException('transform-predict-mess')
-    else:
-        return data
-
- -
-
- -
- -

Instance variables

-
-

var hyperparameters

- - - - -
-
- -
-
-

var pipeline_elements

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- -
-
- -
-

class PipelineElement

- - -

Photon wrapper class for any transformer or predictor element in the pipeline.

-
    -
  1. Saves the hyperparameters that are to be tested and creates a grid of all hyperparameter configurations
  2. -
  3. Enables fast and rapid instantiation of pipeline elements per string identifier, - e.g 'svc' creates an sklearn.svm.SVC object.
  4. -
  5. Attaches a "disable" switch to every element in the pipeline in order to test a complete disable
  6. -
-

Parameters

-
    -
  • 'name' [str]: - A string literal encoding the class to be instantiated
  • -
  • 'hyperparameters' [dict]: - Which values/value range should be tested for the hyperparameter. - In form of "Hyperparameter_name: [array of parameter values to be tested]"
  • -
  • 'test_disabled' [bool]: - If the hyperparameter search should evaluate a complete disabling of the element
  • -
  • 'disabled' [bool]: - If true, the element is currently disabled and does nothing except return the data it received
  • -
  • 'kwargs' [dict]: - Any parameters that should be passed to the object to be instantiated, default parameters
  • -
-
- -
-
class PipelineElement(BaseEstimator):
-    """
-    Photon wrapper class for any transformer or predictor element in the pipeline.
-
-    1. Saves the hyperparameters that are to be tested and creates a grid of all hyperparameter configurations
-    2. Enables fast and rapid instantiation of pipeline elements per string identifier,
-         e.g 'svc' creates an sklearn.svm.SVC object.
-    3. Attaches a "disable" switch to every element in the pipeline in order to test a complete disable
-
-
-    Parameters
-    ----------
-    * 'name' [str]:
-       A string literal encoding the class to be instantiated
-    * 'hyperparameters' [dict]:
-       Which values/value range should be tested for the hyperparameter.
-       In form of "Hyperparameter_name: [array of parameter values to be tested]"
-    * 'test_disabled' [bool]:
-        If the hyperparameter search should evaluate a complete disabling of the element
-    * 'disabled' [bool]:
-        If true, the element is currently disabled and does nothing except return the data it received
-    * 'kwargs' [dict]:
-        Any parameters that should be passed to the object to be instantiated, default parameters
-
-    """
-    # Registering Pipeline Elements
-    ELEMENT_DICTIONARY = PhotonRegister.get_package_info()
-
-    def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
-                 disabled: bool =False, base_element=None,
-                 **kwargs):
-        """
-        Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)
-
-        Returns
-        -------
-        instantiated class object
-        """
-        if hyperparameters is None:
-            hyperparameters = {}
-
-        if not base_element:
-            if name in PipelineElement.ELEMENT_DICTIONARY:
-                try:
-                    desired_class_info = PipelineElement.ELEMENT_DICTIONARY[name]
-                    desired_class_home = desired_class_info[0]
-                    desired_class_name = desired_class_info[1]
-                    imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-                    desired_class = getattr(imported_module, desired_class_name)
-                    base_element = desired_class(**kwargs)
-                    obj = PipelineElement(name, hyperparameters, test_disabled, disabled, base_element)
-                    self.base_element = obj
-                except AttributeError as ae:
-                    Logger().error('ValueError: Could not find according class:'
-                                   + str(PipelineElement.ELEMENT_DICTIONARY[name]))
-                    raise ValueError('Could not find according class:', PipelineElement.ELEMENT_DICTIONARY[name])
-            else:
-                Logger().error('Element not supported right now:' + name)
-                raise NameError('Element not supported right now:', name)
-        else:
-            self.base_element = base_element
-
-
-        # Todo: check if hyperparameters are members of the class
-        # Todo: write method that returns any hyperparameter that could be optimized --> sklearn: get_params.keys
-        # Todo: map any hyperparameter to a possible default list of values to try
-        self.name = name
-        self.test_disabled = test_disabled
-        self._sklearn_disabled = self.name + '__disabled'
-        self._hyperparameters = hyperparameters
-        # check if hyperparameters are already in sklearn style
-        if len(hyperparameters) > 0:
-            key_0 = next(iter(hyperparameters))
-            if self.name not in key_0:
-                self.hyperparameters = hyperparameters
-        self.disabled = disabled
-
-    def copy_me(self):
-        return deepcopy(self)
-
-    @classmethod
-    def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-        """
-        Takes an instantiated object and encapsulates it into the PHOTON structure,
-        add the disabled function and attaches information about the hyperparameters that should be tested
-        """
-        return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value: dict):
-        self.generate_sklearn_hyperparameters(value)
-
-    def generate_config_grid(self):
-        config_dict = create_global_config_dict([self])
-        if len(config_dict) > 0:
-            if self.test_disabled:
-                config_dict.pop(self._sklearn_disabled)
-            config_list = list(ParameterGrid(config_dict))
-            if self.test_disabled:
-                config_list.append({self._sklearn_disabled: True})
-            return config_list
-        else:
-            return []
-
-    def generate_sklearn_hyperparameters(self, value: dict):
-        """
-        Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-        """
-        self._hyperparameters = {}
-        for attribute, value_list in value.items():
-            self._hyperparameters[self.name + '__' + attribute] = value_list
-        if self.test_disabled:
-            self._hyperparameters[self._sklearn_disabled] = [False, True]
-
-    def get_params(self, deep: bool=True):
-        """
-        Forwards the get_params request to the wrapped base element
-        """
-        return self.base_element.get_params(deep)
-
-
-    def set_params(self, **kwargs):
-        """
-        Forwards the set_params request to the wrapped base element
-        Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-        """
-        # element disable is a construct used for this container only
-        if self._sklearn_disabled in kwargs:
-            self.disabled = kwargs[self._sklearn_disabled]
-            del kwargs[self._sklearn_disabled]
-        elif 'disabled' in kwargs:
-            self.disabled = kwargs['disabled']
-            del kwargs['disabled']
-        self.base_element.set_params(**kwargs)
-        return self
-
-    def fit(self, data, targets=None):
-        """
-        Calls the fit function of the base element
-
-        Returns
-        ------
-        self
-        """
-        if not self.disabled:
-            obj = self.base_element
-            obj.fit(data, targets)
-            # self.base_element.fit(data, targets)
-        return self
-
-    def predict(self, data):
-        """
-        Calls predict function on the base element.
-
-        IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-        This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-        Sklearn usually expects the last element to predict.
-        Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-        training only used for transforming.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'predict'):
-                return self.base_element.predict(data)
-            elif hasattr(self.base_element, 'transform'):
-                return self.base_element.transform(data)
-            else:
-                Logger().error('BaseException. base Element should have function ' +
-                               'predict, or at least transform.')
-                raise BaseException('base Element should have function predict, or at least transform.')
-        else:
-            return data
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities
-        base element needs predict_proba() function, otherwise throw
-        base exception.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'predict_proba'):
-                return self.base_element.predict_proba(data)
-            else:
-                Logger().error('BaseException. base Element should have "predict_proba" function.')
-            raise BaseException('base Element should have predict_proba function.')
-        return data
-
-    # def fit_predict(self, data, targets):
-    #     if not self.disabled:
-    #         return self.base_element.fit_predict(data, targets)
-    #     else:
-    #         return data
-
-    def transform(self, data):
-        """
-        Calls transform on the base element.
-
-        IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-        This is used if we are using an estimator as a preprocessing step.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'transform'):
-                return self.base_element.transform(data)
-            elif hasattr(self.base_element, 'predict'):
-                return self.base_element.predict(data)
-            else:
-                Logger().error('BaseException: transform-predict-mess')
-                raise BaseException('transform-predict-mess')
-        else:
-            return data
-
-    def inverse_transform(self, data):
-        """
-        Calls inverse_transform on the base element
-        """
-        if hasattr(self.base_element, 'inverse_transform'):
-            return self.base_element.inverse_transform(data)
-        else:
-            # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-            return data
-
-    # def fit_transform(self, data, targets=None):
-    #     if not self.disabled:
-    #         if hasattr(self.base_element, 'fit_transform'):
-    #             return self.base_element.fit_transform(data, targets)
-    #         elif hasattr(self.base_element, 'transform'):
-    #             self.base_element.fit(data, targets)
-    #             return self.base_element.transform(data)
-    #         # elif hasattr(self.base_element, 'predict'):
-    #         #     self.base_element.fit(data, targets)
-    #         #     return self.base_element.predict(data)
-    #     else:
-    #         return data
-
-    def score(self, X_test, y_test):
-        """
-        Calls the score function on the base element:
-        Returns a goodness of fit measure or a likelihood of unseen data:
-        """
-        return self.base_element.score(X_test, y_test)
-
-    def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-        """Make hyperparameter combinations human readable """
-        if config_name == "disabled" and config_value is False:
-            if return_dict:
-                return {'enabled':True}
-            else:
-                return "enabled = True"
-        else:
-            if return_dict:
-                return {config_name:config_value}
-            else:
-                return config_name + '=' + str(config_value)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, hyperparameters=None, test_disabled=False, disabled=False, base_element=None, **kwargs)

-
- - - - -

Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)

-

Returns

-

instantiated class object

-
- -
-
def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
-             disabled: bool =False, base_element=None,
-             **kwargs):
-    """
-    Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)
-    Returns
-    -------
-    instantiated class object
-    """
-    if hyperparameters is None:
-        hyperparameters = {}
-    if not base_element:
-        if name in PipelineElement.ELEMENT_DICTIONARY:
-            try:
-                desired_class_info = PipelineElement.ELEMENT_DICTIONARY[name]
-                desired_class_home = desired_class_info[0]
-                desired_class_name = desired_class_info[1]
-                imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-                desired_class = getattr(imported_module, desired_class_name)
-                base_element = desired_class(**kwargs)
-                obj = PipelineElement(name, hyperparameters, test_disabled, disabled, base_element)
-                self.base_element = obj
-            except AttributeError as ae:
-                Logger().error('ValueError: Could not find according class:'
-                               + str(PipelineElement.ELEMENT_DICTIONARY[name]))
-                raise ValueError('Could not find according class:', PipelineElement.ELEMENT_DICTIONARY[name])
-        else:
-            Logger().error('Element not supported right now:' + name)
-            raise NameError('Element not supported right now:', name)
-    else:
-        self.base_element = base_element
-    # Todo: check if hyperparameters are members of the class
-    # Todo: write method that returns any hyperparameter that could be optimized --> sklearn: get_params.keys
-    # Todo: map any hyperparameter to a possible default list of values to try
-    self.name = name
-    self.test_disabled = test_disabled
-    self._sklearn_disabled = self.name + '__disabled'
-    self._hyperparameters = hyperparameters
-    # check if hyperparameters are already in sklearn style
-    if len(hyperparameters) > 0:
-        key_0 = next(iter(hyperparameters))
-        if self.name not in key_0:
-            self.hyperparameters = hyperparameters
-    self.disabled = disabled
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls the fit function of the base element

-

Returns

-

self

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls the fit function of the base element
-    Returns
-    ------
-    self
-    """
-    if not self.disabled:
-        obj = self.base_element
-        obj.fit(data, targets)
-        # self.base_element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    config_dict = create_global_config_dict([self])
-    if len(config_dict) > 0:
-        if self.test_disabled:
-            config_dict.pop(self._sklearn_disabled)
-        config_list = list(ParameterGrid(config_dict))
-        if self.test_disabled:
-            config_list.append({self._sklearn_disabled: True})
-        return config_list
-    else:
-        return []
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self, value)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self, value: dict):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for attribute, value_list in value.items():
-        self._hyperparameters[self.name + '__' + attribute] = value_list
-    if self.test_disabled:
-        self._hyperparameters[self._sklearn_disabled] = [False, True]
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep: bool=True):
-    """
-    Forwards the get_params request to the wrapped base element
-    """
-    return self.base_element.get_params(deep)
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Calls predict function on the base element.

-

IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM. -This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer. -Sklearn usually expects the last element to predict. -Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after -training only used for transforming.

-
- -
-
def predict(self, data):
-    """
-    Calls predict function on the base element.
-    IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-    This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-    Sklearn usually expects the last element to predict.
-    Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-    training only used for transforming.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        elif hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        else:
-            Logger().error('BaseException. base Element should have function ' +
-                           'predict, or at least transform.')
-            raise BaseException('base Element should have function predict, or at least transform.')
-    else:
-        return data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities -base element needs predict_proba() function, otherwise throw -base exception.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    base element needs predict_proba() function, otherwise throw
-    base exception.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict_proba'):
-            return self.base_element.predict_proba(data)
-        else:
-            Logger().error('BaseException. base Element should have "predict_proba" function.')
-        raise BaseException('base Element should have predict_proba function.')
-    return data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calls the score function on the base element: -Returns a goodness of fit measure or a likelihood of unseen data:

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calls the score function on the base element:
-    Returns a goodness of fit measure or a likelihood of unseen data:
-    """
-    return self.base_element.score(X_test, y_test)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Forwards the set_params request to the wrapped base element -Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper

-
- -
-
def set_params(self, **kwargs):
-    """
-    Forwards the set_params request to the wrapped base element
-    Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-    """
-    # element disable is a construct used for this container only
-    if self._sklearn_disabled in kwargs:
-        self.disabled = kwargs[self._sklearn_disabled]
-        del kwargs[self._sklearn_disabled]
-    elif 'disabled' in kwargs:
-        self.disabled = kwargs['disabled']
-        del kwargs['disabled']
-    self.base_element.set_params(**kwargs)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on the base element.

-

IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT. -This is used if we are using an estimator as a preprocessing step.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on the base element.
-    IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-    This is used if we are using an estimator as a preprocessing step.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        elif hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        else:
-            Logger().error('BaseException: transform-predict-mess')
-            raise BaseException('transform-predict-mess')
-    else:
-        return data
-
- -
-
- -
- -

Instance variables

-
-

var disabled

- - - - -
-
- -
-
-

var hyperparameters

- - - - -
-
- -
-
-

var name

- - - - -
-
- -
-
-

var test_disabled

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- -
-
- -
-

class PipelineStacking

- - -

Creates a vertical stacking/parallelization of pipeline items.

-

The object acts as single pipeline element and encapsulates several vertically stacked other pipeline elements, each -child receiving the same input data. The data is iteratively distributed to all children, the results are collected -and horizontally concatenated.

-
- -
-
class PipelineStacking(PipelineElement):
-    """
-    Creates a vertical stacking/parallelization of pipeline items.
-
-    The object acts as single pipeline element and encapsulates several vertically stacked other pipeline elements, each
-    child receiving the same input data. The data is iteratively distributed to all children, the results are collected
-    and horizontally concatenated.
-
-    """
-    def __init__(self, name: str, stacking_elements=None, voting: bool=True):
-        """
-        Creates a new PipelineStacking element.
-        Collects all possible hyperparameter combinations of the children
-
-        Parameters
-        ----------
-        * 'name' [str]:
-            Give the pipeline element a name
-        * 'stacking_elements' [list, optional]:
-            List of pipeline elements that should run in parallel
-        * 'voting' [bool]:
-            If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
-        """
-        super(PipelineStacking, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
-                                               base_element=True)
-
-        self._hyperparameters = {}
-        self.pipe_elements = OrderedDict()
-        self.voting = voting
-        if stacking_elements is not None:
-            for item_to_stack in stacking_elements:
-                self.__iadd__(item_to_stack)
-
-    def __iadd__(self, item):
-        """
-        Adds a new element to the stack.
-        Generates sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.
-
-        * 'item' [PipelineElement or PipelineBranch or Hyperpipe]:
-            The Element that should be stacked and will run in a vertical parallelization in the original pipe.
-        """
-        self.pipe_elements[item.name] = item
-        self._hyperparameters[item.name] = item.hyperparameters
-
-        # for each configuration
-        tmp_dict = dict(item.hyperparameters)
-        for key, element in tmp_dict.items():
-            if isinstance(item, PipelineElement):
-                self._hyperparameters[self.name + '__' + key] = tmp_dict[key]
-            else:
-                self._hyperparameters[self.name + '__' + item.name + '__' + key] = tmp_dict[key]
-        return self
-
-    def add(self, item):
-        self.__iadd__(item)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value):
-        """
-        Setting hyperparameters does not make sense, only the items that added can be optimized, not the container (self)
-        """
-        pass
-
-    def generate_config_grid(self):
-        return create_global_config_grid(self.pipe_elements.values(), self.name)
-
-    def get_params(self, deep=True):
-        all_params = {}
-        for name, element in self.pipe_elements.items():
-            all_params[name] = element.get_params(deep)
-        return all_params
-
-    def set_params(self, **kwargs):
-        """
-        Find the particular child and distribute the params to it
-        """
-        spread_params_dict = {}
-        for k, val in kwargs.items():
-            splitted_k = k.split('__')
-            item_name = splitted_k[0]
-            if item_name not in spread_params_dict:
-                spread_params_dict[item_name] = {}
-            dict_entry = {'__'.join(splitted_k[1::]): val}
-            spread_params_dict[item_name].update(dict_entry)
-
-        for name, params in spread_params_dict.items():
-            if name in self.pipe_elements:
-                self.pipe_elements[name].set_params(**params)
-            else:
-                Logger().error('NameError: Could not find element ' + name)
-                raise NameError('Could not find element ', name)
-        return self
-
-    def fit(self, data, targets=None):
-        """
-        Calls fit iteratively on every child
-        """
-        for name, element in self.pipe_elements.items():
-            # Todo: parallellize fitting
-            element.fit(data, targets)
-        return self
-
-    def predict(self, data):
-        """
-        Iteratively calls predict on every child.
-        """
-        # Todo: strategy for concatenating data from different pipes
-        # todo: parallelize prediction
-        predicted_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            element_transform = element.predict(data)
-            predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-        if self.voting:
-            if hasattr(predicted_data, 'shape'):
-                if len(predicted_data.shape) > 1:
-                    predicted_data = np.mean(predicted_data, axis=1).astype(int)
-        return predicted_data
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities for every pipe element and
-        stack them together. Alternatively, do voting instead.
-        """
-        predicted_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            element_transform = element.predict_proba(data)
-            predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-        if self.voting:
-            if hasattr(predicted_data, 'shape'):
-                if len(predicted_data.shape) > 1:
-                    predicted_data = np.mean(predicted_data, axis=1).astype(int)
-        return predicted_data
-
-    def transform(self, data):
-        """
-        Calls transform on every child.
-
-        If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
-        """
-        transformed_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            # if it is a hyperpipe with a final estimator, we want to use predict:
-            if hasattr(element, 'pipe'):
-                if element.overwrite_x is not None:
-                    element_data = element.overwrite_x
-                else:
-                    element_data = data
-                if element.pipe._final_estimator:
-                    element_transform = element.predict(element_data)
-                else:
-                    # if it is just a preprocessing pipe we want to use transform
-                    element_transform = element.transform(element_data)
-            else:
-                raise "I dont know what todo!"
-
-            transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-
-        return transformed_data
-
-    # def fit_predict(self, data, targets):
-    #     predicted_data = None
-    #     for name, element in self.pipe_elements.items():
-    #         element_transform = element.fit_predict(data)
-    #         predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    #     return predicted_data
-    #
-    # def fit_transform(self, data, targets=None):
-    #     transformed_data = np.empty((0, 0))
-    #     for name, element in self.pipe_elements.items():
-    #         # if it is a hyperpipe with a final estimator, we want to use predict:
-    #         if hasattr(element, 'pipe'):
-    #             if element.pipe._final_estimator:
-    #                 element.fit(data, targets)
-    #                 element_transform = element.predict(data)
-    #             else:
-    #                 # if it is just a preprocessing pipe we want to use transform
-    #                 element.fit(data)
-    #                 element_transform = element.transform(data)
-    #             transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-    #     return transformed_data
-
-    @classmethod
-    def stack_data(cls, a, b):
-        """
-        Helper method to horizontally join the outcome of each child
-
-        Parameters
-        ----------
-        * 'a' [ndarray]:
-            The existing matrix
-        * 'b' [ndarray]:
-            The matrix that is to be attached horizontally
-
-        Returns
-        -------
-        New matrix, that is a and b horizontally joined
-
-        """
-        if not a.any():
-            a = b
-        else:
-            # Todo: check for right dimensions!
-            if a.ndim == 1 and b.ndim == 1:
-                a = np.column_stack((a, b))
-            else:
-                b = np.reshape(b, (b.shape[0], 1))
-                a = np.concatenate((a, b), 1)
-        return a
-
-    def score(self, X_test, y_test):
-        """
-        Calculate accuracy for predictions made with this object.
-        This function should probably never be called.
-
-        """
-        # Todo: invent strategy for this ?
-        # raise BaseException('PipelineStacking.score should probably never be reached.')
-        # return 16
-        predicted = self.predict(X_test)
-
-        return accuracy_score(y_test, predicted)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, stacking_elements=None, voting=True)

-
- - - - -

Creates a new PipelineStacking element. -Collects all possible hyperparameter combinations of the children

-

Parameters

-
    -
  • 'name' [str]: - Give the pipeline element a name
  • -
  • 'stacking_elements' [list, optional]: - List of pipeline elements that should run in parallel
  • -
  • 'voting' [bool]: - If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
  • -
-
- -
-
def __init__(self, name: str, stacking_elements=None, voting: bool=True):
-    """
-    Creates a new PipelineStacking element.
-    Collects all possible hyperparameter combinations of the children
-    Parameters
-    ----------
-    * 'name' [str]:
-        Give the pipeline element a name
-    * 'stacking_elements' [list, optional]:
-        List of pipeline elements that should run in parallel
-    * 'voting' [bool]:
-        If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
-    """
-    super(PipelineStacking, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
-                                           base_element=True)
-    self._hyperparameters = {}
-    self.pipe_elements = OrderedDict()
-    self.voting = voting
-    if stacking_elements is not None:
-        for item_to_stack in stacking_elements:
-            self.__iadd__(item_to_stack)
-
- -
-
- -
- - -
-
-

def add(

self, item)

-
- - - - -
- -
-
def add(self, item):
-    self.__iadd__(item)
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls fit iteratively on every child

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls fit iteratively on every child
-    """
-    for name, element in self.pipe_elements.items():
-        # Todo: parallellize fitting
-        element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    return create_global_config_grid(self.pipe_elements.values(), self.name)
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self, value)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self, value: dict):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for attribute, value_list in value.items():
-        self._hyperparameters[self.name + '__' + attribute] = value_list
-    if self.test_disabled:
-        self._hyperparameters[self._sklearn_disabled] = [False, True]
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep=True):
-    all_params = {}
-    for name, element in self.pipe_elements.items():
-        all_params[name] = element.get_params(deep)
-    return all_params
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Iteratively calls predict on every child.

-
- -
-
def predict(self, data):
-    """
-    Iteratively calls predict on every child.
-    """
-    # Todo: strategy for concatenating data from different pipes
-    # todo: parallelize prediction
-    predicted_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        element_transform = element.predict(data)
-        predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    if self.voting:
-        if hasattr(predicted_data, 'shape'):
-            if len(predicted_data.shape) > 1:
-                predicted_data = np.mean(predicted_data, axis=1).astype(int)
-    return predicted_data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities for every pipe element and -stack them together. Alternatively, do voting instead.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities for every pipe element and
-    stack them together. Alternatively, do voting instead.
-    """
-    predicted_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        element_transform = element.predict_proba(data)
-        predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    if self.voting:
-        if hasattr(predicted_data, 'shape'):
-            if len(predicted_data.shape) > 1:
-                predicted_data = np.mean(predicted_data, axis=1).astype(int)
-    return predicted_data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calculate accuracy for predictions made with this object. -This function should probably never be called.

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calculate accuracy for predictions made with this object.
-    This function should probably never be called.
-    """
-    # Todo: invent strategy for this ?
-    # raise BaseException('PipelineStacking.score should probably never be reached.')
-    # return 16
-    predicted = self.predict(X_test)
-    return accuracy_score(y_test, predicted)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Find the particular child and distribute the params to it

-
- -
-
def set_params(self, **kwargs):
-    """
-    Find the particular child and distribute the params to it
-    """
-    spread_params_dict = {}
-    for k, val in kwargs.items():
-        splitted_k = k.split('__')
-        item_name = splitted_k[0]
-        if item_name not in spread_params_dict:
-            spread_params_dict[item_name] = {}
-        dict_entry = {'__'.join(splitted_k[1::]): val}
-        spread_params_dict[item_name].update(dict_entry)
-    for name, params in spread_params_dict.items():
-        if name in self.pipe_elements:
-            self.pipe_elements[name].set_params(**params)
-        else:
-            Logger().error('NameError: Could not find element ' + name)
-            raise NameError('Could not find element ', name)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on every child.

-

If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on every child.
-    If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
-    """
-    transformed_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        # if it is a hyperpipe with a final estimator, we want to use predict:
-        if hasattr(element, 'pipe'):
-            if element.overwrite_x is not None:
-                element_data = element.overwrite_x
-            else:
-                element_data = data
-            if element.pipe._final_estimator:
-                element_transform = element.predict(element_data)
-            else:
-                # if it is just a preprocessing pipe we want to use transform
-                element_transform = element.transform(element_data)
-        else:
-            raise "I dont know what todo!"
-        transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-    return transformed_data
-
- -
-
- -
- -

Instance variables

-
-

var hyperparameters

- -

- Inheritance: - PipelineElement.hyperparameters -

- - - -
-
- -
-
-

var pipe_elements

- - - - -
-
- -
-
-

var voting

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- - -
-
-

def stack_data(

cls, a, b)

-
- - - - -

Helper method to horizontally join the outcome of each child

-

Parameters

-
    -
  • 'a' [ndarray]: - The existing matrix
  • -
  • 'b' [ndarray]: - The matrix that is to be attached horizontally
  • -
-

Returns

-

New matrix, that is a and b horizontally joined

-
- -
-
@classmethod
-def stack_data(cls, a, b):
-    """
-    Helper method to horizontally join the outcome of each child
-    Parameters
-    ----------
-    * 'a' [ndarray]:
-        The existing matrix
-    * 'b' [ndarray]:
-        The matrix that is to be attached horizontally
-    Returns
-    -------
-    New matrix, that is a and b horizontally joined
-    """
-    if not a.any():
-        a = b
-    else:
-        # Todo: check for right dimensions!
-        if a.ndim == 1 and b.ndim == 1:
-            a = np.column_stack((a, b))
-        else:
-            b = np.reshape(b, (b.shape[0], 1))
-            a = np.concatenate((a, b), 1)
-    return a
-
- -
-
- -
- -
-
- -
-

class PipelineSwitch

- - -

This class encapsulates several pipeline elements that belong at the same step of the pipeline, -competing for being the best choice.

-

If for example you want to find out if preprocessing A or preprocessing B is better at this position in the pipe. -Or you want to test if a tree outperforms the good old SVM.

-

ATTENTION: This class is a construct that may be convenient but is not suitable for any complex optimizations. -Currently it only works for grid_search and the derived optimization strategies. -USE THIS ONLY FOR RAPID PROTOTYPING AND PRELIMINARY RESULTS

-

The class acts as if it is a single entity. Tt joins the hyperparamater combinations of each encapsulated element to -a single, big combination grid. Each hyperparameter combination from that grid gets a number. Then the PipelineSwitch -object publishes the numbers to be chosen as the object's hyperparameter. When a new number is chosen from the -optimizer, it internally activates the belonging element and sets the element's parameter to the hyperparameter -combination. In that way, each of the elements is tested in all its configurations at the same position in the -pipeline. From the outside, the process and the optimizer only sees one parameter of the PipelineSwitch, that is -the an integer indicating which item of the hyperparameter combination grid is currently active.

-
- -
-
class PipelineSwitch(PipelineElement):
-    """
-    This class encapsulates several pipeline elements that belong at the same step of the pipeline,
-    competing for being the best choice.
-
-    If for example you want to find out if preprocessing A or preprocessing B is better at this position in the pipe.
-    Or you want to test if a tree outperforms the good old SVM.
-
-    ATTENTION: This class is a construct that may be convenient but is not suitable for any complex optimizations.
-    Currently it only works for grid_search and the derived optimization strategies.
-    USE THIS ONLY FOR RAPID PROTOTYPING AND PRELIMINARY RESULTS
-
-    The class acts as if it is a single entity. Tt joins the hyperparamater combinations of each encapsulated element to
-    a single, big combination grid. Each hyperparameter combination from that grid gets a number. Then the PipelineSwitch
-    object publishes the numbers to be chosen as the object's hyperparameter. When a new number is chosen from the
-    optimizer, it internally activates the belonging element and sets the element's parameter to the hyperparameter
-    combination. In that way, each of the elements is tested in all its configurations at the same position in the
-    pipeline. From the outside, the process and the optimizer only sees one parameter of the PipelineSwitch, that is
-    the an integer indicating which item of the hyperparameter combination grid is currently active.
-
-    """
-
-    def __init__(self, name: str, pipeline_element_list: list = None, _estimator_type='regressor'):
-        """
-        Creates a new PipelineSwitch object and generated the hyperparameter combination grid
-
-        Parameters
-        ----------
-        * 'name' [str]:
-            How the element is called in the pipeline
-        * 'pipeline_element_list' [list, optional]:
-            The competing pipeline elements
-        * '_estimator_type:
-            Used for validation purposes, either classifier or regressor
-
-        """
-        self.name = name
-        self.sklearn_name = self.name + "__current_element"
-        self._hyperparameters = {}
-        self._current_element = (1, 1)
-        self.disabled = False
-        self.test_disabled = False
-        self.pipeline_element_configurations = []
-        self._estimator_type = _estimator_type
-
-        if pipeline_element_list:
-            self.pipeline_element_list = pipeline_element_list
-            self.generate_private_config_grid()
-        else:
-            self.pipeline_element_list = []
-
-    def __iadd__(self, other):
-        self.pipeline_element_list.append(other)
-        self.generate_private_config_grid()
-        return self
-
-    def add(self, other):
-        self.__iadd__(other)
-
-    @property
-    def hyperparameters(self):
-        # Todo: return actual hyperparameters of all pipeline elements??
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value):
-        pass
-
-    def generate_private_config_grid(self):
-        # reset
-        self.pipeline_element_configurations = []
-
-        # calculate anew
-        hyperparameters = []
-        # generate possible combinations for each item respectively - do not mix hyperparameters across items
-        for i, pipe_element in enumerate(self.pipeline_element_list):
-            # distinct_values_config = create_global_config([pipe_element])
-            # add pipeline switch name in the config so that the hyperparameters can be set from other classes
-            # pipeline switch will give the hyperparameters to the respective child
-            # distinct_values_config_copy = {}
-            # for config_key, config_value in distinct_values_config.items():
-            #     distinct_values_config_copy[self.name + "__" + config_key] = config_value
-
-            element_configurations = pipe_element.generate_config_grid()
-            final_configuration_list = []
-            for dict_item in element_configurations:
-                copy_of_dict_item = {}
-                for key, value in dict_item.items():
-                    copy_of_dict_item[self.name + '__' + key] = value
-                final_configuration_list.append(copy_of_dict_item)
-
-            self.pipeline_element_configurations.append(final_configuration_list)
-            hyperparameters += [(i, nr) for nr in range(len(final_configuration_list))]
-
-        self._hyperparameters = {self.sklearn_name: hyperparameters}
-
-    @property
-    def current_element(self):
-        return self._current_element
-
-    @current_element.setter
-    def current_element(self, value):
-        self._current_element = value
-        # pass the right config to the element
-        # config = self.pipeline_element_configurations[value[0]][value[1]]
-        # self.base_element.set_params(config)
-
-    @property
-    def base_element(self):
-        """
-        Returns the currently active element
-        """
-        obj = self.pipeline_element_list[self.current_element[0]]
-        return obj
-
-    def set_params(self, **kwargs):
-
-        """
-        The optimization process sees the amount of possible combinations and chooses one of them.
-        Then this class activates the belonging element and prepared the element with the particular chosen configuration.
-
-        """
-
-        config_nr = None
-        if self.sklearn_name in kwargs:
-            config_nr = kwargs[self._sklearn_curr_element]
-        elif 'current_element' in kwargs:
-            config_nr = kwargs['current_element']
-
-        if config_nr is None or not isinstance(config_nr, (tuple, list)):
-            Logger().error('ValueError: current_element must be of type Tuple')
-            raise ValueError('current_element must be of type Tuple')
-        else:
-            self.current_element = config_nr
-            config = self.pipeline_element_configurations[config_nr[0]][config_nr[1]]
-            # remove name
-            unnamed_config = {}
-            for config_key, config_value in config.items():
-                key_split = config_key.split('__')
-                unnamed_config['__'.join(key_split[2::])] = config_value
-            self.base_element.set_params(**unnamed_config)
-        return self
-
-    def prettify_config_output(self, config_name, config_value, return_dict=False):
-
-        """
-        Makes the sklearn configuration dictionary human readable
-
-        Returns
-        -------
-        * 'prettified_configuration_string' [str]:
-            configuration as prettified string or configuration as dict with prettified keys
-        """
-
-        if isinstance(config_value, tuple):
-            output = self.pipeline_element_configurations[config_value[0]][config_value[1]]
-            if not output:
-                if return_dict:
-                    return {self.pipeline_element_list[config_value[0]].name:None}
-                else:
-                    return self.pipeline_element_list[config_value[0]].name
-            else:
-                if return_dict:
-                    return output
-                return str(output)
-        else:
-            return super(PipelineSwitch, self).prettify_config_output(config_name, config_value)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, pipeline_element_list=None, _estimator_type='regressor')

-
- - - - -

Creates a new PipelineSwitch object and generated the hyperparameter combination grid

-

Parameters

-
    -
  • 'name' [str]: - How the element is called in the pipeline
  • -
  • 'pipeline_element_list' [list, optional]: - The competing pipeline elements
  • -
  • '_estimator_type: - Used for validation purposes, either classifier or regressor
  • -
-
- -
-
def __init__(self, name: str, pipeline_element_list: list = None, _estimator_type='regressor'):
-    """
-    Creates a new PipelineSwitch object and generated the hyperparameter combination grid
-    Parameters
-    ----------
-    * 'name' [str]:
-        How the element is called in the pipeline
-    * 'pipeline_element_list' [list, optional]:
-        The competing pipeline elements
-    * '_estimator_type:
-        Used for validation purposes, either classifier or regressor
-    """
-    self.name = name
-    self.sklearn_name = self.name + "__current_element"
-    self._hyperparameters = {}
-    self._current_element = (1, 1)
-    self.disabled = False
-    self.test_disabled = False
-    self.pipeline_element_configurations = []
-    self._estimator_type = _estimator_type
-    if pipeline_element_list:
-        self.pipeline_element_list = pipeline_element_list
-        self.generate_private_config_grid()
-    else:
-        self.pipeline_element_list = []
-
- -
-
- -
- - -
-
-

def add(

self, other)

-
- - - - -
- -
-
def add(self, other):
-    self.__iadd__(other)
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls the fit function of the base element

-

Returns

-

self

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls the fit function of the base element
-    Returns
-    ------
-    self
-    """
-    if not self.disabled:
-        obj = self.base_element
-        obj.fit(data, targets)
-        # self.base_element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    config_dict = create_global_config_dict([self])
-    if len(config_dict) > 0:
-        if self.test_disabled:
-            config_dict.pop(self._sklearn_disabled)
-        config_list = list(ParameterGrid(config_dict))
-        if self.test_disabled:
-            config_list.append({self._sklearn_disabled: True})
-        return config_list
-    else:
-        return []
-
- -
-
- -
- - -
-
-

def generate_private_config_grid(

self)

-
- - - - -
- -
-
def generate_private_config_grid(self):
-    # reset
-    self.pipeline_element_configurations = []
-    # calculate anew
-    hyperparameters = []
-    # generate possible combinations for each item respectively - do not mix hyperparameters across items
-    for i, pipe_element in enumerate(self.pipeline_element_list):
-        # distinct_values_config = create_global_config([pipe_element])
-        # add pipeline switch name in the config so that the hyperparameters can be set from other classes
-        # pipeline switch will give the hyperparameters to the respective child
-        # distinct_values_config_copy = {}
-        # for config_key, config_value in distinct_values_config.items():
-        #     distinct_values_config_copy[self.name + "__" + config_key] = config_value
-        element_configurations = pipe_element.generate_config_grid()
-        final_configuration_list = []
-        for dict_item in element_configurations:
-            copy_of_dict_item = {}
-            for key, value in dict_item.items():
-                copy_of_dict_item[self.name + '__' + key] = value
-            final_configuration_list.append(copy_of_dict_item)
-        self.pipeline_element_configurations.append(final_configuration_list)
-        hyperparameters += [(i, nr) for nr in range(len(final_configuration_list))]
-    self._hyperparameters = {self.sklearn_name: hyperparameters}
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self, value)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self, value: dict):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for attribute, value_list in value.items():
-        self._hyperparameters[self.name + '__' + attribute] = value_list
-    if self.test_disabled:
-        self._hyperparameters[self._sklearn_disabled] = [False, True]
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep: bool=True):
-    """
-    Forwards the get_params request to the wrapped base element
-    """
-    return self.base_element.get_params(deep)
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Calls predict function on the base element.

-

IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM. -This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer. -Sklearn usually expects the last element to predict. -Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after -training only used for transforming.

-
- -
-
def predict(self, data):
-    """
-    Calls predict function on the base element.
-    IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-    This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-    Sklearn usually expects the last element to predict.
-    Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-    training only used for transforming.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        elif hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        else:
-            Logger().error('BaseException. base Element should have function ' +
-                           'predict, or at least transform.')
-            raise BaseException('base Element should have function predict, or at least transform.')
-    else:
-        return data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities -base element needs predict_proba() function, otherwise throw -base exception.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    base element needs predict_proba() function, otherwise throw
-    base exception.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict_proba'):
-            return self.base_element.predict_proba(data)
-        else:
-            Logger().error('BaseException. base Element should have "predict_proba" function.')
-        raise BaseException('base Element should have predict_proba function.')
-    return data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Makes the sklearn configuration dictionary human readable

-

Returns

-
    -
  • 'prettified_configuration_string' [str]: - configuration as prettified string or configuration as dict with prettified keys
  • -
-
- -
-
def prettify_config_output(self, config_name, config_value, return_dict=False):
-    """
-    Makes the sklearn configuration dictionary human readable
-    Returns
-    -------
-    * 'prettified_configuration_string' [str]:
-        configuration as prettified string or configuration as dict with prettified keys
-    """
-    if isinstance(config_value, tuple):
-        output = self.pipeline_element_configurations[config_value[0]][config_value[1]]
-        if not output:
-            if return_dict:
-                return {self.pipeline_element_list[config_value[0]].name:None}
-            else:
-                return self.pipeline_element_list[config_value[0]].name
-        else:
-            if return_dict:
-                return output
-            return str(output)
-    else:
-        return super(PipelineSwitch, self).prettify_config_output(config_name, config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calls the score function on the base element: -Returns a goodness of fit measure or a likelihood of unseen data:

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calls the score function on the base element:
-    Returns a goodness of fit measure or a likelihood of unseen data:
-    """
-    return self.base_element.score(X_test, y_test)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

The optimization process sees the amount of possible combinations and chooses one of them. -Then this class activates the belonging element and prepared the element with the particular chosen configuration.

-
- -
-
def set_params(self, **kwargs):
-    """
-    The optimization process sees the amount of possible combinations and chooses one of them.
-    Then this class activates the belonging element and prepared the element with the particular chosen configuration.
-    """
-    config_nr = None
-    if self.sklearn_name in kwargs:
-        config_nr = kwargs[self._sklearn_curr_element]
-    elif 'current_element' in kwargs:
-        config_nr = kwargs['current_element']
-    if config_nr is None or not isinstance(config_nr, (tuple, list)):
-        Logger().error('ValueError: current_element must be of type Tuple')
-        raise ValueError('current_element must be of type Tuple')
-    else:
-        self.current_element = config_nr
-        config = self.pipeline_element_configurations[config_nr[0]][config_nr[1]]
-        # remove name
-        unnamed_config = {}
-        for config_key, config_value in config.items():
-            key_split = config_key.split('__')
-            unnamed_config['__'.join(key_split[2::])] = config_value
-        self.base_element.set_params(**unnamed_config)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on the base element.

-

IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT. -This is used if we are using an estimator as a preprocessing step.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on the base element.
-    IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-    This is used if we are using an estimator as a preprocessing step.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        elif hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        else:
-            Logger().error('BaseException: transform-predict-mess')
-            raise BaseException('transform-predict-mess')
-    else:
-        return data
-
- -
-
- -
- -

Instance variables

-
-

var base_element

- - - - -

Returns the currently active element

-
-
- -
-
-

var current_element

- - - - -
-
- -
-
-

var disabled

- - - - -
-
- -
-
-

var hyperparameters

- -

- Inheritance: - PipelineElement.hyperparameters -

- - - -
-
- -
-
-

var name

- - - - -
-
- -
-
-

var pipeline_element_configurations

- - - - -
-
- -
-
-

var sklearn_name

- - - - -
-
- -
-
-

var test_disabled

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- -
-
- -
- -
-
- -
- - diff --git a/photonai/documentation/photonai/configuration/index.html b/photonai/documentation/photonai/configuration/index.html deleted file mode 100644 index 5afa86e0..00000000 --- a/photonai/documentation/photonai/configuration/index.html +++ /dev/null @@ -1,1093 +0,0 @@ - - - - - - photonai.configuration API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai.configuration module

-

PHOTON Classes for registering pipeline elements and submodule functionalities

- - -
-
""" PHOTON Classes for registering pipeline elements and submodule functionalities"""
-
-# from .Register import PhotonRegister
-# __all__ = ("PhotonRegister")
-
- -
- -
- -
- - - -
- -
-
- -
- - diff --git a/photonai/documentation/photonai/index.html b/photonai/documentation/photonai/index.html deleted file mode 100644 index fa56df07..00000000 --- a/photonai/documentation/photonai/index.html +++ /dev/null @@ -1,6965 +0,0 @@ - - - - - - photonai API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai module

-

PHOTON -A Python-based Hyperparameter Optimization Toolbox for Neural Networks designed to accelerate and simplify the construction, training, and evaluation of machine learning models.

-

PHOTON is an object-oriented python framework for optimizing machine learning pipelines, - designed to leave you deciding the important things and automatizing the rest.

-

PHOTON gives you an easy way of setting up a full stack machine learning pipeline including nested cross-validation and hyperparameter search. -After PHOTON has found the best configuration for your model, it offers a convenient possibility to explore the analyzed hyperparameter space. -It also enables you to persist and load your optimal model, including all preprocessing steps, with only one line of code.

- - -
- - - -
- -

Sub-modules

-
-

photonai.base

- - -

PHOTON Base Classes enabling the nested-cross-validated hyperparameter search.

- -
-
-

photonai.configuration

- - -

PHOTON Classes for registering pipeline elements and submodule functionalities

- -
-
-

photonai.investigator

- - -

PHOTON Investigator delivers a web-based tool for exploring the hyperparameter search results.

- -
-
-

photonai.optimization

- - -

PHOTON Classes for defining the hyperparameter search space and optimization strategies

- -
-
-

photonai.validation

- - -

PHOTON classes for testing a specific hyperparameter configuration and calculating the performance metrics.

- -
- - -

Classes

- - - -
-

class Hyperpipe

- - -

Wrapper class for machine learning pipeline, holding all pipeline elements -and managing the optimization of the hyperparameters

-

Parameters

-
    -
  • -

    'name' [str]: - Name of hyperpipe instance

    -
  • -
  • -

    'inner_cv' [BaseCrossValidator]: - Cross validation strategy to test hyperparameter configurations, generates the validation set

    -
  • -
  • -

    'outer_cv' [BaseCrossValidator]: - Cross validation strategy to use for the hyperparameter search itself, generates the test set

    -
  • -
  • -

    'optimizer' [str or object, default="grid_search"]: - Hyperparameter optimization algorithm

    -
      -
    • -

      In case a string literal is given:

      -
        -
      • "grid_search": optimizer that iteratively tests all possible hyperparameter combinations
      • -
      • "random_grid_search": a variation of the grid search optimization that randomly picks hyperparameter - combinations from all possible hyperparameter combinations
      • -
      • "timeboxed_random_grid_search": randomly chooses hyperparameter combinations from the set of all - possible hyperparameter combinations and tests until the given time limit is reached
      • -
      • 'limit_in_minutes': int
      • -
      -
    • -
    • -

      In case an object is given: - expects the object to have the following methods:

      -
    • -
    • 'next_config_generator': returns a hyperparameter configuration in form of an dictionary containing - key->value pairs in the sklearn parameter encoding 'model_name__parameter_name: parameter_value'
    • -
    • 'prepare': takes a list of pipeline elements and their particular hyperparameters to test
    • -
    • 'evaluate_recent_performance': gets a tested config and the respective performance in order to - calculate a smart next configuration to process
    • -
    -
  • -
  • -

    'metrics' [list of metric names as str]: - Metrics that should be calculated for both training, validation and test set - Use the preimported metrics from sklearn and photonai, or register your own

    -
      -
    • Metrics for 'classification':
        -
      • 'accuracy': sklearn.metrics.accuracy_score
      • -
      • 'matthews_corrcoef': sklearn.metrics.matthews_corrcoef
      • -
      • 'confusion_matrix': sklearn.metrics.confusion_matrix,
      • -
      • 'f1_score': sklearn.metrics.f1_score
      • -
      • 'hamming_loss': sklearn.metrics.hamming_loss
      • -
      • 'log_loss': sklearn.metrics.log_loss
      • -
      • 'precision': sklearn.metrics.precision_score
      • -
      • 'recall': sklearn.metrics.recall_score
      • -
      -
    • -
    • Metrics for 'regression':
        -
      • 'mean_squared_error': sklearn.metrics.mean_squared_error
      • -
      • 'mean_absolute_error': sklearn.metrics.mean_absolute_error
      • -
      • 'explained_variance': sklearn.metrics.explained_variance_score
      • -
      • 'r2': sklearn.metrics.r2_score
      • -
      -
    • -
    • Other metrics
        -
      • 'pearson_correlation': photon_core.framework.Metrics.pearson_correlation
      • -
      • 'variance_explained': photon_core.framework.Metrics.variance_explained_score
      • -
      • 'categorical_accuracy': photon_core.framework.Metrics.categorical_accuracy_score
      • -
      -
    • -
    -
  • -
  • -

    'best_config_metric' [str]: - The metric that should be maximized or minimized in order to choose the best hyperparameter configuration

    -
  • -
  • -

    'eval_final_performance' [bool, default=True]: - If the metrics should be calculated for the test set, otherwise the test set is seperated but not used

    -
  • -
  • -

    'test_size' [float, default=0.2]: - the amount of the data that should be left out if no outer_cv is given and - eval_final_perfomance is set to True

    -
  • -
  • -

    'set_random_seed' [bool, default=False]: - If True sets the random seed to 42

    -
  • -
  • -

    'verbosity' [int, default=0]: - The level of verbosity, 0 is least talkative and gives only warn and error, 1 gives adds info and 2 adds debug

    -
  • -
  • -

    'groups' [array-like, default=None]: - Info for advanced cross validation strategies, such as LeaveOneSiteOut-CV about the affiliation - of the rows in the data

    -
  • -
  • -

    'filter_element' [SourceFilter, default=None]: - Instance of SourceFilter Class that transforms the input data, e.g. extracts certain columns

    -
  • -
  • -

    'imbalanced_data_strategy_filter' [str, default=None]: - Uses the imblearn package to handle imbalanced class distributions in the data - A strategy is used to transform the data into more balanced distributions before the hyperparameter search - is started. - Strategies to choose from are:

    -
      -
    • imbalance_type = OVERSAMPLING:
        -
      • RandomOverSampler
      • -
      • SMOTE
      • -
      • ADASYN
      • -
      -
    • -
    -

    -imbalance_type = UNDERSAMPLING: - - ClusterCentroids, - - RandomUnderSampler, - - NearMiss, - - InstanceHardnessThreshold, - - CondensedNearestNeighbour, - - EditedNearestNeighbours, - - RepeatedEditedNearestNeighbours, - - AllKNN, - - NeighbourhoodCleaningRule, - - OneSidedSelection

    -
      -
    • imbalance_type = COMBINE:
        -
      • SMOTEENN,
      • -
      • SMOTETomek
      • -
      -
    • -
    -
  • -
-

Attributes

-
    -
  • -

    'optimum_pipe' [Pipeline]: - An sklearn pipeline object that is fitted to the training data according to the best hyperparameter - configuration found. Currently, we don't create an ensemble of all best hyperparameter configs over all folds. - We find the best config by comparing the test error across outer folds. The hyperparameter config of the best - fold is used as the optimal model and is then trained on the complete set.

    -
  • -
  • -

    'best_config' [dict]: - Dictionary containing the hyperparameters of the best configuration. - Contains the parameters in the sklearn interface of model_name__parameter_name: parameter value

    -
  • -
  • -

    'result_tree' [MDBHyperpipe]: - Object containing all information about the for the performed hyperparameter search. - Holds the training and test metrics for all outer folds, inner folds and configurations, as well as - additional information.

    -
  • -
  • -

    'pipeline_elements' [list]: - Contains all PipelineElement or Hyperpipe objects that are added to the pipeline.

    -
  • -
-

Example

-
manager = Hyperpipe('test_manager',
-                    optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1},
-                    outer_cv=ShuffleSplit(test_size=0.2, n_splits=1),
-                    inner_cv=KFold(n_splits=10, shuffle=True),
-                    metrics=['accuracy', 'precision', 'recall', "f1_score"],
-                    best_config_metric='accuracy', eval_final_performance=True,
-                    verbose=2)
-
-
- -
-
class Hyperpipe(BaseEstimator):
-    """
-    Wrapper class for machine learning pipeline, holding all pipeline elements
-    and managing the optimization of the hyperparameters
-
-    Parameters
-    ----------
-    * 'name' [str]:
-        Name of hyperpipe instance
-
-    * 'inner_cv' [BaseCrossValidator]:
-        Cross validation strategy to test hyperparameter configurations, generates the validation set
-
-    * 'outer_cv' [BaseCrossValidator]:
-        Cross validation strategy to use for the hyperparameter search itself, generates the test set
-
-    * 'optimizer' [str or object, default="grid_search"]:
-        Hyperparameter optimization algorithm
-
-        - In case a string literal is given:
-            - "grid_search": optimizer that iteratively tests all possible hyperparameter combinations
-            - "random_grid_search": a variation of the grid search optimization that randomly picks hyperparameter
-               combinations from all possible hyperparameter combinations
-            - "timeboxed_random_grid_search": randomly chooses hyperparameter combinations from the set of all
-               possible hyperparameter combinations and tests until the given time limit is reached
-               - 'limit_in_minutes': int
-
-        - In case an object is given:
-          expects the object to have the following methods:
-           - 'next_config_generator': returns a hyperparameter configuration in form of an dictionary containing
-              key->value pairs in the sklearn parameter encoding 'model_name__parameter_name: parameter_value'
-           - 'prepare': takes a list of pipeline elements and their particular hyperparameters to test
-           - 'evaluate_recent_performance': gets a tested config and the respective performance in order to
-              calculate a smart next configuration to process
-
-    * 'metrics' [list of metric names as str]:
-        Metrics that should be calculated for both training, validation and test set
-        Use the preimported metrics from sklearn and photonai, or register your own
-
-        - Metrics for 'classification':
-            - 'accuracy': sklearn.metrics.accuracy_score
-            - 'matthews_corrcoef': sklearn.metrics.matthews_corrcoef
-            - 'confusion_matrix': sklearn.metrics.confusion_matrix,
-            - 'f1_score': sklearn.metrics.f1_score
-            - 'hamming_loss': sklearn.metrics.hamming_loss
-            - 'log_loss': sklearn.metrics.log_loss
-            - 'precision': sklearn.metrics.precision_score
-            - 'recall': sklearn.metrics.recall_score
-        - Metrics for 'regression':
-            - 'mean_squared_error': sklearn.metrics.mean_squared_error
-            - 'mean_absolute_error': sklearn.metrics.mean_absolute_error
-            - 'explained_variance': sklearn.metrics.explained_variance_score
-            - 'r2': sklearn.metrics.r2_score
-        - Other metrics
-            - 'pearson_correlation': photon_core.framework.Metrics.pearson_correlation
-            - 'variance_explained':  photon_core.framework.Metrics.variance_explained_score
-            - 'categorical_accuracy': photon_core.framework.Metrics.categorical_accuracy_score
-
-    * 'best_config_metric' [str]:
-        The metric that should be maximized or minimized in order to choose the best hyperparameter configuration
-
-    * 'eval_final_performance' [bool, default=True]:
-        If the metrics should be calculated for the test set, otherwise the test set is seperated but not used
-
-    * 'test_size' [float, default=0.2]:
-        the amount of the data that should be left out if no outer_cv is given and
-        eval_final_perfomance is set to True
-
-    * 'set_random_seed' [bool, default=False]:
-        If True sets the random seed to 42
-
-    * 'verbosity' [int, default=0]:
-        The level of verbosity, 0 is least talkative and gives only warn and error, 1 gives adds info and 2 adds debug
-
-    * 'groups' [array-like, default=None]:
-        Info for advanced cross validation strategies, such as LeaveOneSiteOut-CV about the affiliation
-        of the rows in the data
-
-    * 'filter_element' [SourceFilter, default=None]:
-        Instance of SourceFilter Class that transforms the input data, e.g. extracts certain columns
-
-    * 'imbalanced_data_strategy_filter' [str, default=None]:
-        Uses the imblearn package to handle imbalanced class distributions in the data
-        A strategy is used to transform the data into more balanced distributions before the hyperparameter search
-        is started.
-        Strategies to choose from are:
-        - imbalance_type = OVERSAMPLING:
-            - RandomOverSampler
-            - SMOTE
-            - ADASYN
-
-        -imbalance_type = UNDERSAMPLING:
-            - ClusterCentroids,
-            - RandomUnderSampler,
-            - NearMiss,
-            - InstanceHardnessThreshold,
-            - CondensedNearestNeighbour,
-            - EditedNearestNeighbours,
-            - RepeatedEditedNearestNeighbours,
-            - AllKNN,
-            - NeighbourhoodCleaningRule,
-            - OneSidedSelection
-
-        - imbalance_type = COMBINE:
-            - SMOTEENN,
-            - SMOTETomek
-
-    Attributes
-    ----------
-    * 'optimum_pipe' [Pipeline]:
-        An sklearn pipeline object that is fitted to the training data according to the best hyperparameter
-        configuration found. Currently, we don't create an ensemble of all best hyperparameter configs over all folds.
-        We find the best config by comparing the test error across outer folds. The hyperparameter config of the best
-        fold is used as the optimal model and is then trained on the complete set.
-
-    * 'best_config' [dict]:
-        Dictionary containing the hyperparameters of the best configuration.
-        Contains the parameters in the sklearn interface of model_name__parameter_name: parameter value
-
-    * 'result_tree' [MDBHyperpipe]:
-        Object containing all information about the for the performed hyperparameter search.
-        Holds the training and test metrics for all outer folds, inner folds and configurations, as well as
-        additional information.
-
-    * 'pipeline_elements' [list]:
-        Contains all PipelineElement or Hyperpipe objects that are added to the pipeline.
-
-    Example
-    -------
-        manager = Hyperpipe('test_manager',
-                            optimizer='timeboxed_random_grid_search', optimizer_params={'limit_in_minutes': 1},
-                            outer_cv=ShuffleSplit(test_size=0.2, n_splits=1),
-                            inner_cv=KFold(n_splits=10, shuffle=True),
-                            metrics=['accuracy', 'precision', 'recall', "f1_score"],
-                            best_config_metric='accuracy', eval_final_performance=True,
-                            verbose=2)
-
-   """
-
-    OPTIMIZER_DICTIONARY = {'grid_search': GridSearchOptimizer,
-                            'random_grid_search': RandomGridSearchOptimizer,
-                            'timeboxed_random_grid_search': TimeBoxedRandomGridSearchOptimizer}
-
-    def __init__(self, name, inner_cv: BaseCrossValidator, outer_cv=None,
-                 optimizer='grid_search', optimizer_params: dict = {}, metrics=None,
-                 best_config_metric=None, eval_final_performance=True, test_size: float = 0.2,
-                 calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False,
-                 groups=None, set_random_seed: bool=False,
-                 filter_element=None, imbalanced_data_strategy_filter: str = '',
-                 verbosity=0,
-                 persist_options=None,
-                 performance_constraints=None):
-
-        # Re eval_final_performance:
-        # set eval_final_performance to False because
-        # 1. if no cv-object is given, no split is performed --> seems more logical
-        #    than passing nothing, passing no cv-object but getting
-        #    an 80/20 split by default
-        # 2. if cv-object is given, split is performed but we don't peek
-        #    into the test set --> thus we can evaluate more hp configs
-        #    later without double dipping
-
-        self.name = name
-        self.inner_cv = inner_cv
-        self.outer_cv = outer_cv
-        self.eval_final_performance = eval_final_performance
-        self.test_size = test_size
-        self.cv_iter = None
-        self.data_test_cases = None
-
-        self.calculate_metrics_per_fold = calculate_metrics_per_fold
-        self.calculate_metrics_across_folds = calculate_metrics_across_folds
-
-        # Todo: if self.outer_cv is LeaveOneOut: Set calculate metrics across folds to True -> Print
-
-        self.X = None
-        self.y = None
-
-        self.groups = groups
-        self.filter_element = filter_element
-        if imbalanced_data_strategy_filter:
-            self.imbalanced_data_strategy_filter = ImbalancedDataTransform(imbalanced_data_strategy_filter)
-        else:
-            self.imbalanced_data_strategy_filter = None
-
-        self.fit_duration = 0
-
-        if set_random_seed:
-            import random
-            random.seed(42)
-            print('set random seed to 42')
-
-        # set verbosity level
-        Logger().set_verbosity(verbosity)
-
-        # MongoDBWriter setup
-        if persist_options:
-            self.persist_options = persist_options
-            if self.persist_options.log_file:
-                Logger().set_custom_log_file(self.persist_options.log_file)
-        else:
-            self.persist_options = OutputSettings()
-        self.mongodb_writer = MongoDBWriter(self.persist_options)
-
-        self.pipeline_elements = []
-        self._pipe = None
-        self.optimum_pipe = None
-
-        self.metrics = metrics
-        #  Todo: raise error or warning if metrics and best config_metric is None
-        self.best_config_metric = best_config_metric
-        self.config_optimizer = None
-
-        self.result_tree = None
-        self.best_config = None
-        self.best_children_config = None
-        self.best_performance = None
-        self.is_final_fit = False
-
-        self.__mother_outer_fold_counter = 0
-        self.__mother_inner_fold_counter = 0
-        self.__mother_config_counter = 0
-
-        # containers for optimization history and logging
-        self._performance_history_list = []
-
-        if isinstance(optimizer, str):
-            # instantiate optimizer from string
-            #  Todo: check if optimizer strategy is already implemented
-            optimizer_class = self.OPTIMIZER_DICTIONARY[optimizer]
-            optimizer_instance = optimizer_class(**optimizer_params)
-            self.optimizer = optimizer_instance
-        else:
-            # Todo: check if correct object
-            self.optimizer = optimizer
-
-        self._validation_X = None
-        self._validation_y = None
-        self._test_X = None
-        self._test_y = None
-        self._last_fit_data_hash = None
-        self._current_fold = -1
-        self._num_of_folds = 0
-        self._is_mother_pipe = True
-        self._fold_data_hashes = []
-
-        self.inner_cv_callback_function = performance_constraints
-
-    def _set_verbosity(self, verbosity):
-        """
-        Set verbosity level manually
-        Returns None
-
-        Parameters
-        ----------
-        * 'verbosity' [Integer]:
-            Verbosity level can be 0, 1, or 2.
-
-        """
-        Logger().set_verbosity(verbosity)
-
-    def _set_persist_options(self, persist_options):
-        """
-        Set persist options manually
-        Returns None
-
-        Parameters
-        ----------
-        * persist_options' [OutputSettings]:
-
-        """
-        self.persist_options = persist_options
-        if self.persist_options.log_file:
-            Logger().set_custom_log_file(self.persist_options.log_file)
-        self.mongodb_writer = MongoDBWriter(self.persist_options)
-
-    def __iadd__(self, pipe_element):
-        """
-        Add an element to the machine learning pipeline
-        Returns self
-
-        Parameters
-        ----------
-        * 'pipe_element' [PipelineElement or Hyperpipe]:
-            The object to add to the machine learning pipeline, being either a transformer or an estimator.
-
-        """
-        # if isinstance(pipe_element, PipelineElement):
-        self.pipeline_elements.append(pipe_element)
-        # Todo: is repeated each time element is added....
-        self._prepare_pipeline()
-        return self
-        # else:
-        #     Todo: raise error
-        # raise TypeError("Element must be of type Pipeline Element")
-
-    def add(self, pipe_element):
-        self.__iadd__(pipe_element)
-
-    def __yield_all_data(self):
-        """
-        Helper function that iteratively returns the data stored in self.X
-        Returns an iterable version of self.X
-        """
-        if hasattr(self.X, 'shape'):
-            yield list(range(self.X.shape[0])), []
-        else:
-            yield list(range(len(self.X))), []
-
-    def _generate_outer_cv_indices(self):
-        """
-        Generates the training and  test set indices for the hyperparameter search
-        Returns a tuple of training and test indices
-
-        - If there is a strategy given for the outer cross validation the strategy is called to split the data
-        - If no strategy is given and eval_final_performance is True, all data is used for training
-        - If no strategy is given and eval_final_performance is False: a test set is seperated from the
-          training and validation set by the parameter test_size with ShuffleSplit
-        """
-        # if there is a CV Object for cross validating the hyperparameter search
-        if self.outer_cv:
-            self.data_test_cases = self.outer_cv.split(self.X, self.y)
-        # in case we do not want to divide between validation and test set
-        elif not self.eval_final_performance:
-            self.data_test_cases = self.__yield_all_data()
-        # the default is dividing one time into a validation and test set
-        else:
-            train_test_cv_object = ShuffleSplit(n_splits=1, test_size=self.test_size)
-            self.data_test_cases = train_test_cv_object.split(self.X, self.y)
-
-    def _distribute_cv_info_to_hyperpipe_children(self, reset: bool =False, reset_final_fit: bool=False,
-                                                  outer_fold_counter: int=None, inner_fold_counter: int =None,
-                                                  num_of_folds: int = None, config_counter: int =None):
-        """
-        Informs all elements of the pipeline that are of type hyperpipe (hyperpipe children)
-        about the mother's configuration or current state
-
-        Parameters
-        ----------
-        * 'num_of_folds' [int]:
-            how many inner folds the mother hyperpipe has
-
-        * 'outer_fold_counter' [int]:
-            in which outer fold the mother hyerpipe currently is
-
-        * 'inner_fold_counter' [int]:
-            in which inner fold the mother hyperpipe currently is
-
-        * 'config_counter' [int]:
-            in which config_nr the mother hyperpipe actually is
-
-        * 'reset' [bool, default = False]:
-            if the hyperparameter search starts anew
-
-        * 'reset_final_fit' [bool, default = False]:
-            reset the is_final_fit parameter so that children hyperpipe train anew for outer fold of mother pipe
-
-        """
-
-        def _distribute_info_to_object(pipe_object, number_of_folds, reset_folds, reset_final_fit,
-                                      outer_fold_counter, inner_fold_counter, config_counter):
-            if pipe_object.local_search:
-                if number_of_folds is not None:
-                    pipe_object.num_of_folds = number_of_folds
-                    pipe_object.is_mother_pipe = False
-                if reset_folds:
-                    pipe_object.current_fold = -1
-                if outer_fold_counter is not None:
-                    pipe_object.mother_outer_fold_counter = outer_fold_counter
-                if inner_fold_counter is not None:
-                    pipe_object.mother_inner_fold_counter = inner_fold_counter
-                if config_counter:
-                    pipe_object.mother_config_counter = config_counter
-                if reset_final_fit:
-                    pipe_object.is_final_fit = False
-
-        # walk through all children of pipeline, if its a hyperpipe distribute the information
-        for element_tuple in self._pipe.steps:
-            element_object = element_tuple[1]
-            if isinstance(element_object, Hyperpipe):
-                _distribute_info_to_object(element_object, num_of_folds, reset, reset_final_fit,
-                                          outer_fold_counter, inner_fold_counter, config_counter)
-            elif isinstance(element_object, PipelineStacking):
-                for child_pipe_name, child_pipe_object in element_object.pipe_elements.items():
-                    if isinstance(child_pipe_object, Hyperpipe):
-                        _distribute_info_to_object(child_pipe_object, num_of_folds, reset, reset_final_fit,
-                                                  outer_fold_counter, inner_fold_counter, config_counter)
-
-    def update_mother_inner_fold_nr(self, new_inner_fold_nr: int):
-        """
-        Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children
-
-        Parameters
-        ----------
-        * 'new_inner_fold_nr' [int]:
-            in which inner_fold the mother hyperpipe currently is
-        """
-        self._distribute_cv_info_to_hyperpipe_children(inner_fold_counter=new_inner_fold_nr)
-
-    def fit(self, data, targets, **fit_params):
-        """
-        Starts the hyperparameter search and/or fits the pipeline to the data and targets
-
-        Manages the nested cross validated hyperparameter search:
-
-        1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
-        2. requests new configurations from the hyperparameter search strategy, the optimizer,
-        3. initializes the testing of a specific configuration,
-        4. communicates the result to the optimizer,
-        5. repeats 2-4 until optimizer delivers no more configurations to test
-        6. finally searches for the best config in all tested configs,
-        7. trains the pipeline with the best config and evaluates the performance on the test set
-
-        Parameters
-        ----------
-         * `data` [array-like, shape=[N, D]]:
-            the training and test data, where N is the number of samples and D is the number of features.
-
-         * `targets` [array-like, shape=[N]]:
-            the truth values, where N is the number of samples.
-
-
-        Returns
-        -------
-         * 'self'
-            Returns self
-
-        """
-
-        # in case we want to inject some data from outside the pipeline
-
-        self.X = data
-        self.y = targets
-
-
-        # !!!!!!!!!!!!!!!! FIT ONLY IF DATA CHANGED !!!!!!!!!!!!!!!!!!!
-        # -------------------------------------------------------------
-
-        # in case we need to reduce the dimension of the data due to parallelity of the outer pipe, lets do it.
-        if self.filter_element:
-            self.X = self.filter_element.transform(self.X)
-
-        # if the groups are imbalanced, and a strategy is chosen, apply it here
-        if self.imbalanced_data_strategy_filter:
-            self.imbalanced_data_strategy_filter.fit(self.X, self.y)
-            self.X, self.y = self.imbalanced_data_strategy_filter.transform()
-
-        self._current_fold += 1
-
-        # be compatible to list of (image-) files
-        if isinstance(self.X, list):
-            self.X = np.asarray(self.X)
-        if isinstance(self.y, list):
-            self.y = np.asarray(self.y)
-
-        # handle neuro Imge paths as data
-        # ToDo: Need to check the DATA, not the img paths for neuro
-        new_data_hash = sha1(np.asarray(self.X, order='C')).hexdigest()
-
-        # fit
-        # 1. if it is first time ever or
-        # 2. the data did change for that fold or
-        # 3. if it is the mother pipe (then number_of_folds = 0)
-        if (len(self._fold_data_hashes) < self._num_of_folds) \
-                or (self._num_of_folds > 0 and self._fold_data_hashes[self._current_fold] != new_data_hash) \
-                or self._num_of_folds == 0:
-
-            # save data hash for that fold
-            if self._num_of_folds > 0:
-                if len(self._fold_data_hashes) < self._num_of_folds:
-                    self._fold_data_hashes.append(new_data_hash)
-                else:
-                    self._fold_data_hashes[self._current_fold] = new_data_hash
-
-            # optimize: iterate through configs and save results
-            if not self.is_final_fit:
-
-                # first check if correct optimizer metric has been chosen
-                # pass pipeline_elements so that OptimizerMetric can look for last
-                # element and use the corresponding score method
-                self.config_optimizer = OptimizerMetric(self.best_config_metric, self.pipeline_elements, self.metrics)
-                self.metrics = self.config_optimizer.check_metrics()
-
-                if 'score' in self.metrics:
-                    Logger().warn('Attention: Scoring with default score function of estimator can slow down calculations!')
-
-                # generate OUTER ! cross validation splits to iterate over
-                self._generate_outer_cv_indices()
-
-                outer_fold_counter = 0
-
-                if not self._is_mother_pipe:
-                    self.result_tree_name = self.name + '_outer_fold_' + str(self.__mother_outer_fold_counter)  \
-                                            + '_inner_fold_' + str(self.__mother_inner_fold_counter)
-                else:
-                    self.result_tree_name = self.name
-
-                # initialize result logging with hyperpipe class
-                self.result_tree = MDBHyperpipe(name=self.result_tree_name)
-                self.result_tree.outer_folds = []
-                self.result_tree.eval_final_performance = self.eval_final_performance
-                self.result_tree.best_config_metric = self.best_config_metric
-
-                # loop over outer cross validation
-                for train_indices, test_indices in self.data_test_cases:
-
-                    # give the optimizer the chance to inform about elements
-                    self.optimizer.prepare(self.pipeline_elements)
-
-                    outer_fold_counter += 1
-                    outer_fold_fit_start_time = time.time()
-
-                    Logger().info('HYPERPARAMETER SEARCH OF {0}, Outer Cross validation Fold {1}'
-                                  .format(self.name, outer_fold_counter))
-
-                    t1 = time.time()
-
-                    # Prepare Train and validation set data
-                    self._validation_X = self.X[train_indices]
-                    self._validation_y = self.y[train_indices]
-                    self._test_X = self.X[test_indices]
-                    self._test_y = self.y[test_indices]
-
-                    # Prepare inner cross validation
-                    cv_iter = list(self.inner_cv.split(self._validation_X, self._validation_y))
-                    num_folds = len(cv_iter)
-                    num_samples_train = len(self._validation_y)
-                    num_samples_test = len(self._test_y)
-
-                    # distribute number of folds to encapsulated child hyperpipes
-                    self._distribute_cv_info_to_hyperpipe_children(num_of_folds=num_folds,
-                                                                   outer_fold_counter=outer_fold_counter)
-
-                    tested_config_counter = 0
-
-                    # add outer fold info object to result tree
-                    outer_fold = MDBOuterFold(fold_nr=outer_fold_counter)
-                    outer_fold.tested_config_list = []
-                    self.result_tree.outer_folds.append(outer_fold)
-
-                    # do the optimizing
-                    for current_config in self.optimizer.next_config:
-                        self._distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)
-                        hp = TestPipeline(self._pipe, current_config, self.metrics, self.update_mother_inner_fold_nr,
-                                          mongo_db_settings=self.persist_options,
-                                          callback_function=self.inner_cv_callback_function)
-                        Logger().debug('optimizing of:' + self.name)
-                        Logger().debug(self._optimize_printing(current_config))
-                        Logger().debug('calculating...')
-
-                        # Test the configuration cross validated by inner_cv object
-                        current_config_mdb = hp.calculate_cv_score(self._validation_X, self._validation_y, cv_iter,
-                                                            calculate_metrics_per_fold=self.calculate_metrics_per_fold,
-                                                            calculate_metrics_across_folds=self.calculate_metrics_across_folds)
-
-                        current_config_mdb.config_nr = tested_config_counter
-                        current_config_mdb.config_dict = current_config
-                        current_config_mdb.pipe_name = self.name
-                        tested_config_counter += 1
-                        current_config_mdb.human_readable_config = self.config_to_dict(current_config)
-
-                        # save the configuration of all children pipelines
-                        children_config = {}
-                        children_config_ref_list = []
-                        for pipe_step in self._pipe.steps:
-                            item = pipe_step[1]
-                            if isinstance(item, Hyperpipe):
-                                if item.local_search and item.best_config is not None:
-                                    children_config[item.name] = item.best_config
-                            elif isinstance(item, PipelineStacking):
-                                for subhyperpipe_name, hyperpipe in item.pipe_elements.items():
-                                    if isinstance(hyperpipe, Hyperpipe):
-                                        if hyperpipe.local_search and hyperpipe.best_config is not None:
-                                            # special case: we need to access pipe over pipeline_stacking element
-                                            children_config[item.name + '__' + subhyperpipe_name] = hyperpipe.best_config.config_dict
-                                        # children_config_ref_list.append(hyperpipe.best_config_outer_fold._id)
-                        specific_parameters = self._pipe.get_params()
-                        #current_config_mdb.full_model_spec = specific_parameters
-
-                        current_config_mdb.children_config_dict = children_config
-                        current_config_mdb.children_config_ref = children_config_ref_list
-
-                        Logger().verbose(self._optimize_printing(current_config))
-
-                        if not current_config_mdb.config_failed:
-                            # get optimizer_metric and forward to optimizer
-                            # todo: also pass greater_is_better=True/False to optimizer
-                            metric_train = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric)
-                            metric_test = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric, train=False)
-                            #
-                            # if not metric_train or metric_test:
-                            #     raise Exception("Config did not fail, but did not get any metrics either....!!?")
-                            config_performance = (metric_train, metric_test)
-
-                            # Print Result for config
-                            Logger().debug('...done:')
-                            Logger().verbose(self.config_optimizer.metric + str(config_performance))
-                        else:
-                             config_performance = (-1, -1)
-                             # Print Result for config
-                             Logger().debug('...failed:')
-                             Logger().error(current_config_mdb.config_error)
-
-                        # add config to result tree and do intermediate saving
-                        self.result_tree.outer_folds[-1].tested_config_list.append(current_config_mdb)
-                        # Todo: add try catch in case config cannot be written
-                        self.mongodb_writer.save(self.result_tree)
-
-                        # 3. inform optimizer about performance
-                        self.optimizer.evaluate_recent_performance(current_config, config_performance)
-
-                    if tested_config_counter > 0:
-                        best_config_outer_fold = self.config_optimizer.get_optimum_config(outer_fold.tested_config_list)
-
-                        if not best_config_outer_fold:
-                            raise Exception("No best config was found!")
-                        best_config_outer_fold_mdb = MDBConfig()
-                        best_config_outer_fold_mdb.children_config_dict = best_config_outer_fold.children_config_dict
-                        best_config_outer_fold_mdb.pipe_name = self.name
-                        best_config_outer_fold_mdb.children_config_ref = best_config_outer_fold.children_config_ref
-                        # best_config_outer_fold_mdb.best_config_ref_to_train_item = best_config_outer_fold._id
-                        best_config_outer_fold_mdb.config_dict = best_config_outer_fold.config_dict
-                        best_config_outer_fold_mdb.human_readable_config = best_config_outer_fold.human_readable_config
-
-
-                        # inform user
-                        Logger().info('finished optimization of ' + self.name)
-                        Logger().verbose('Result')
-                        Logger().verbose('Number of tested configurations:' + str(tested_config_counter))
-                        Logger().verbose('Optimizer metric: ' + self.config_optimizer.metric + '\n' +
-                                         '   --> Greater is better: ' + str(self.config_optimizer.greater_is_better))
-                        Logger().info('Best config: ' + self._optimize_printing(best_config_outer_fold_mdb.config_dict) +
-                                      '\n' + '... with children config: '
-                                      + self._optimize_printing(best_config_outer_fold_mdb.children_config_dict))
-
-
-                        # ... and create optimal pipeline
-                        self.optimum_pipe = self._pipe
-                        # set self to best config
-                        self.optimum_pipe.set_params(**best_config_outer_fold_mdb.config_dict)
-
-                        # set all children to best config and inform to NOT optimize again, ONLY fit
-                        for child_name, child_config in best_config_outer_fold_mdb.children_config_dict.items():
-                            if child_config:
-                                # in case we have a pipeline stacking we need to identify the particular subhyperpipe
-                                splitted_name = child_name.split('__')
-                                if len(splitted_name) > 1:
-                                    stacking_element = self.optimum_pipe.named_steps[splitted_name[0]]
-                                    pipe_element = stacking_element.pipe_elements[splitted_name[1]]
-                                else:
-                                    pipe_element = self.optimum_pipe.named_steps[child_name]
-                                pipe_element.set_params(**child_config)
-                                pipe_element.is_final_fit = True
-
-                        self._distribute_cv_info_to_hyperpipe_children(reset=True)
-
-                        Logger().verbose('...now fitting ' + self.name + ' with optimum configuration')
-                        fit_time_start = time.time()
-                        self.optimum_pipe.fit(self._validation_X, self._validation_y)
-                        final_fit_duration = time.time() - fit_time_start
-
-                        #self.best_config_outer_fold.full_model_spec = self.optimum_pipe.get_params()
-                        best_config_outer_fold_mdb.fit_duration_minutes = final_fit_duration
-                        self.result_tree.outer_folds[-1].best_config = best_config_outer_fold_mdb
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = []
-
-                        if self.eval_final_performance:
-                            # Todo: generate mean and std over outer folds as well. move this items to the top
-                            Logger().verbose('...now predicting ' + self.name + ' unseen data')
-
-                            test_score_mdb = TestPipeline.score(self.optimum_pipe, self._test_X, self._test_y,
-                                                                self.metrics,
-                                                                save_predictions=self.persist_options.save_predictions,
-                                                                save_feature_importances=self.persist_options.save_feature_importances)
-
-                            Logger().info('.. calculating metrics for test set (' + self.name + ')')
-                            Logger().verbose('...now predicting ' + self.name + ' final model with training data')
-
-                            train_score_mdb = TestPipeline.score(self.optimum_pipe, self._validation_X, self._validation_y,
-                                                                 self.metrics,
-                                                                 save_predictions=self.persist_options.save_predictions,
-                                                                 save_feature_importances=self.persist_options.save_feature_importances)
-
-                            # save test fold
-                            outer_fold_mdb = MDBInnerFold()
-                            outer_fold_mdb.fold_nr = 1
-                            outer_fold_mdb.number_samples_training = num_samples_train
-                            outer_fold_mdb.number_samples_validation = num_samples_test
-                            outer_fold_mdb.training = train_score_mdb
-                            outer_fold_mdb.validation = test_score_mdb
-                            self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-
-                            Logger().info('PERFORMANCE TRAIN:')
-                            for m_key, m_value in train_score_mdb.metrics.items():
-                                Logger().info(str(m_key) + ": " + str(m_value))
-
-                            Logger().info('PERFORMANCE TEST:')
-                            for m_key, m_value in test_score_mdb.metrics.items():
-                                    Logger().info(str(m_key) + ": " + str(m_value))
-                        else:
-
-                            # save test fold
-                            outer_fold_mdb = MDBInnerFold()
-                            outer_fold_mdb.fold_nr = 1
-                            outer_fold_mdb.number_samples_training = num_samples_train
-                            outer_fold_mdb.number_samples_validation = num_samples_test
-
-                            def _copy_inner_fold_means(metric_dict):
-                                # We copy all mean values from validation to the best config
-                                # training
-                                train_item_metrics = {}
-                                for m in metric_dict:
-                                    if m.operation == str(FoldOperations.MEAN):
-                                        train_item_metrics[m.metric_name] = m.value
-                                train_item = MDBScoreInformation()
-                                train_item.metrics_copied_from_inner = True
-                                train_item.metrics = train_item_metrics
-                                return train_item
-
-                            # training
-                            outer_fold_mdb.training = _copy_inner_fold_means(best_config_outer_fold.metrics_train)
-                            # validation
-                            outer_fold_mdb.validation = _copy_inner_fold_means(best_config_outer_fold.metrics_test)
-
-                            self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-
-                    Logger().info('This took {} minutes.'.format((time.time() - t1) / 60))
-                    self.result_tree.time_of_results = datetime.datetime.now()
-                    self.mongodb_writer.save(self.result_tree)
-                    self._distribute_cv_info_to_hyperpipe_children(reset_final_fit=True, outer_fold_counter=outer_fold_counter)
-
-                # Compute all final metrics
-                self.result_tree.metrics_train, self.result_tree.metrics_test = MDBHelper.aggregate_metrics(self.result_tree.outer_folds,
-                                                                                                            self.metrics)
-                # save result tree to db or file or both
-                self.mongodb_writer.save(self.result_tree)
-                Logger().info("Saved result tree to database")
-
-                # Find best config across outer folds
-                self.best_config = self.config_optimizer.get_optimum_config_outer_folds(self.result_tree.outer_folds)
-                self.result_tree.best_config = self.best_config
-                Logger().info('OVERALL BEST CONFIGURATION')
-                Logger().info('--------------------------')
-                Logger().info(self._optimize_printing(self.best_config.config_dict) +
-                              '\n' + '... with children config: '
-                              + self._optimize_printing(self.best_config.children_config_dict))
-                # set self to best config
-                self.optimum_pipe = self._pipe
-                self.optimum_pipe.set_params(**self.best_config.config_dict)
-                self.optimum_pipe.fit(self._validation_X, self._validation_y)
-
-                # save results again
-                self.mongodb_writer.save(self.result_tree)
-                Logger().info("Saved overall best config to database")
-            ###############################################################################################
-            else:
-                self._pipe.fit(self.X, self.y, **fit_params)
-
-        else:
-            Logger().verbose("Avoided fitting of " + self.name + " on fold "
-                             + str(self._current_fold) + " because data did not change")
-            Logger().verbose('Best config of ' + self.name + ' : ' + str(self.best_config))
-
-        return self
-
-    def predict(self, data):
-        """
-        Use the optimum pipe to predict the data
-
-        Returns
-        -------
-            predicted targets
-
-        """
-        # Todo: if local_search = true then use optimized pipe here?
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.predict(data)
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities
-
-        Returns
-        -------
-        predicted probabilities
-
-        """
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.predict_proba(data)
-
-    def transform(self, data):
-        """
-        Use the optimum pipe to transform the data
-        """
-        if self._pipe:
-            if self.filter_element:
-                data = self.filter_element.transform(data)
-            return self.optimum_pipe.transform(data)
-
-    def get_params(self, deep=True):
-        """
-        Retrieve parameters from sklearn pipeline
-        """
-        if self._pipe is not None:
-            return self._pipe.get_params(deep)
-        else:
-            return None
-
-    def set_params(self, **params):
-        """
-        Give parameter values to the pipeline elements
-        """
-        if self._pipe is not None:
-            self._pipe.set_params(**params)
-        return self
-
-    def _prepare_pipeline(self):
-        """
-        build sklearn pipeline from PipelineElements and
-        calculate parameter grid for all combinations of pipeline element hyperparameters
-        """
-        # prepare pipeline
-        pipeline_steps = []
-        for item in self.pipeline_elements:
-            # pipeline_steps.append((item.name, item.base_element))
-            pipeline_steps.append((item.name, item))
-
-        # build pipeline...
-        self._pipe = Pipeline(pipeline_steps)
-
-    def copy_me(self):
-        """
-        Helper function to copy all pipeline elements
-        """
-        item_list =[]
-        for item in self.pipeline_elements:
-            item_list.append(item.copy_me())
-        return item_list
-
-    def _copy_pipeline(self):
-        """
-        Copy Pipeline by building a new sklearn Pipeline with Pipeline Elements
-
-        Returns
-        -------
-        new sklearn Pipeline object
-        """
-        pipeline_steps = []
-        for item in self.pipeline_elements:
-            cpy = item.copy_me()
-            if isinstance(cpy, list):
-                for new_step in cpy:
-                    pipeline_steps.append((new_step.name, new_step))
-            else:
-                pipeline_steps.append((cpy.name, cpy))
-        return Pipeline(pipeline_steps)
-
-    def save_optimum_pipe(self, file):
-        """
-        Save optimal pipeline only. Complete hyperpipe will no not be saved.
-
-        Parameters
-        ----------
-        * 'file' [str]:
-            File path as string specifying file to save pipeline to
-        """
-        element_number = 0
-        element_identifier = list()
-        folder = os.path.splitext(file)[0]
-        file = os.path.splitext(file)[0] + '.photon'
-
-        if os.path.exists(folder):
-            raise FileExistsError('Trying to save optimum pipe: The file you specified already exists as a folder.')
-        else:
-            os.mkdir(folder)
-            folder = folder + '/'
-        wrapper_files = list()
-
-        for element_name, element in self.optimum_pipe.named_steps.items():
-            filename = '_optimum_pipe_' + str(element_number) + '_' + element_name
-            element_identifier.append({'element_name': element_name,
-                                       'filename': filename})
-            if hasattr(element.base_element, 'save'):
-                element.base_element.save(folder + filename)
-                element_identifier[-1]['mode'] = 'custom'
-                element_identifier[-1]['wrapper_script'] = os.path.basename(inspect.getfile(element.base_element.__class__))
-                wrapper_files.append(inspect.getfile(element.base_element.__class__))
-                element_identifier[-1]['test_disabled'] = element.test_disabled
-                element_identifier[-1]['disabled'] = element.disabled
-                element_identifier[-1]['hyperparameters'] = element.hyperparameters
-
-            else:
-                try:
-                    joblib.dump(element, folder + filename + '.pkl', compress=1)
-                    element_identifier[-1]['mode'] = 'pickle'
-                except:
-                    raise NotImplementedError("Custom pipeline element must implement .save() method or "
-                                              "allow pickle.")
-            element_number += 1
-        # save pipeline blueprint to make loading of pipeline easier
-        with open(folder + '_optimum_pipe_blueprint.pkl', 'wb') as f:
-            pickle.dump(element_identifier, f)
-
-        # get all files
-        files = glob.glob(folder + '_optimum_pipe_*')
-        with zipfile.ZipFile(file, 'w') as myzip:
-            for f in files:
-                myzip.write(f, os.path.basename(f))
-                os.remove(f)
-            for f in wrapper_files:
-                myzip.write(f, os.path.splitext(os.path.basename(f))[0] + '.py')
-        os.removedirs(folder)
-
-    @staticmethod
-    def load_optimum_pipe(file):
-        """
-        Load optimal pipeline.
-
-
-        Parameters
-        ----------
-        * 'file' [str]:
-            File path specifying .photon file to load optimal pipeline from
-
-        Returns
-        -------
-        sklearn Pipeline with all trained photon_pipelines
-        """
-        if file.endswith('.photon'):
-            archive_name = os.path.splitext(file)[0]
-            folder = archive_name + '/'
-            zf = zipfile.ZipFile(file)
-            zf.extractall(folder)
-        else:
-            raise FileNotFoundError('Specify .photon file that holds PHOTON optimum pipe.')
-
-        setup_info = pickle.load(open(folder + '_optimum_pipe_blueprint.pkl', 'rb'))
-        element_list = list()
-        for element_info in setup_info:
-            if element_info['mode'] == 'custom':
-                spec = importlib.util.spec_from_file_location(element_info['element_name'],
-                                                              folder + element_info['wrapper_script'])
-                imported_module = importlib.util.module_from_spec(spec)
-                spec.loader.exec_module(imported_module)
-                base_element = getattr(imported_module, element_info['element_name'])
-                custom_element = PipelineElement(name=element_info['element_name'], base_element=base_element(),
-                                                 hyperparameters=element_info['hyperparameters'],
-                                                 test_disabled=element_info['test_disabled'],
-                                                 disabled=element_info['disabled'])
-                custom_element.base_element.load(folder + element_info['filename'])
-                element_list.append((element_info['element_name'], custom_element))
-            else:
-                element_list.append((element_info['element_name'], joblib.load(folder + element_info['filename'] + '.pkl')))
-
-        return Pipeline(element_list)
-
-
-    def inverse_transform_pipeline(self, hyperparameters: dict, data, targets, data_to_inverse):
-        """
-        Inverse transform data for a pipeline with specific hyperparameter configuration
-
-        1. Copy Sklearn Pipeline,
-        2. Set Parameters
-        3. Fit Pipeline to data and targets
-        4. Inverse transform data with that pipeline
-
-        Parameters
-        ----------
-        * 'hyperparameters' [dict]:
-            The concrete configuration settings for the pipeline elements
-        * 'data' [array-like]:
-            The training data to which the pipeline is fitted
-        * 'targets' [array-like]:
-            The truth values for training
-        * 'data_to_inverse' [array-like]:
-            The data that should be inversed after training
-
-        Returns
-        -------
-        Inversed data as array
-        """
-        copied_pipe = self._copy_pipeline()
-        copied_pipe.set_params(**hyperparameters)
-        copied_pipe.fit(data, targets)
-        return copied_pipe.inverse_transform(data_to_inverse)
-
-    def _optimize_printing(self, config: dict):
-        """
-        make the sklearn config syntax prettily readable for humans
-        """
-        prettified_config = [self.name + '\n']
-        for el_key, el_value in config.items():
-            items = el_key.split('__')
-            name = items[0]
-            rest = '__'.join(items[1::])
-            if name in self._pipe.named_steps:
-                new_pretty_key = '    ' + name + '->'
-                prettified_config.append(new_pretty_key +
-                                         self._pipe.named_steps[name].prettify_config_output(rest, el_value) + '\n')
-            else:
-                Logger().error('ValueError: Item is not contained in pipeline:' + name)
-                raise ValueError('Item is not contained in pipeline:' + name)
-        return ''.join(prettified_config)
-
-    @staticmethod
-    def prettify_config_output(config_name: str, config_value):
-        """
-        Print the disabled = False as Enabled = True for better human reading
-        """
-        if config_name == "disabled" and config_value is False:
-            return "enabled = True"
-        else:
-            return config_name + '=' + str(config_value)
-
-
-    def config_to_dict(self, specific_config):
-        """
-        """
-        config = {}
-        for key, value in specific_config.items():
-            items = key.split('__')
-            name = items[0]
-            rest = '__'.join(items[1::])
-            if name in self._pipe.named_steps:
-                config.update(self._pipe.named_steps[name].prettify_config_output(rest, value, return_dict=True))
-                #config[name] = value
-        return config
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • Hyperpipe
  • -
  • sklearn.base.BaseEstimator
  • -
  • builtins.object
  • -
-

Class variables

-
-

var OPTIMIZER_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, inner_cv, outer_cv=None, optimizer='grid_search', optimizer_params={}, metrics=None, best_config_metric=None, eval_final_performance=True, test_size=0.2, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False, groups=None, set_random_seed=False, filter_element=None, imbalanced_data_strategy_filter='', verbosity=0, persist_options=None, performance_constraints=None)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, name, inner_cv: BaseCrossValidator, outer_cv=None,
-             optimizer='grid_search', optimizer_params: dict = {}, metrics=None,
-             best_config_metric=None, eval_final_performance=True, test_size: float = 0.2,
-             calculate_metrics_per_fold: bool = True, calculate_metrics_across_folds: bool = False,
-             groups=None, set_random_seed: bool=False,
-             filter_element=None, imbalanced_data_strategy_filter: str = '',
-             verbosity=0,
-             persist_options=None,
-             performance_constraints=None):
-    # Re eval_final_performance:
-    # set eval_final_performance to False because
-    # 1. if no cv-object is given, no split is performed --> seems more logical
-    #    than passing nothing, passing no cv-object but getting
-    #    an 80/20 split by default
-    # 2. if cv-object is given, split is performed but we don't peek
-    #    into the test set --> thus we can evaluate more hp configs
-    #    later without double dipping
-    self.name = name
-    self.inner_cv = inner_cv
-    self.outer_cv = outer_cv
-    self.eval_final_performance = eval_final_performance
-    self.test_size = test_size
-    self.cv_iter = None
-    self.data_test_cases = None
-    self.calculate_metrics_per_fold = calculate_metrics_per_fold
-    self.calculate_metrics_across_folds = calculate_metrics_across_folds
-    # Todo: if self.outer_cv is LeaveOneOut: Set calculate metrics across folds to True -> Print
-    self.X = None
-    self.y = None
-    self.groups = groups
-    self.filter_element = filter_element
-    if imbalanced_data_strategy_filter:
-        self.imbalanced_data_strategy_filter = ImbalancedDataTransform(imbalanced_data_strategy_filter)
-    else:
-        self.imbalanced_data_strategy_filter = None
-    self.fit_duration = 0
-    if set_random_seed:
-        import random
-        random.seed(42)
-        print('set random seed to 42')
-    # set verbosity level
-    Logger().set_verbosity(verbosity)
-    # MongoDBWriter setup
-    if persist_options:
-        self.persist_options = persist_options
-        if self.persist_options.log_file:
-            Logger().set_custom_log_file(self.persist_options.log_file)
-    else:
-        self.persist_options = OutputSettings()
-    self.mongodb_writer = MongoDBWriter(self.persist_options)
-    self.pipeline_elements = []
-    self._pipe = None
-    self.optimum_pipe = None
-    self.metrics = metrics
-    #  Todo: raise error or warning if metrics and best config_metric is None
-    self.best_config_metric = best_config_metric
-    self.config_optimizer = None
-    self.result_tree = None
-    self.best_config = None
-    self.best_children_config = None
-    self.best_performance = None
-    self.is_final_fit = False
-    self.__mother_outer_fold_counter = 0
-    self.__mother_inner_fold_counter = 0
-    self.__mother_config_counter = 0
-    # containers for optimization history and logging
-    self._performance_history_list = []
-    if isinstance(optimizer, str):
-        # instantiate optimizer from string
-        #  Todo: check if optimizer strategy is already implemented
-        optimizer_class = self.OPTIMIZER_DICTIONARY[optimizer]
-        optimizer_instance = optimizer_class(**optimizer_params)
-        self.optimizer = optimizer_instance
-    else:
-        # Todo: check if correct object
-        self.optimizer = optimizer
-    self._validation_X = None
-    self._validation_y = None
-    self._test_X = None
-    self._test_y = None
-    self._last_fit_data_hash = None
-    self._current_fold = -1
-    self._num_of_folds = 0
-    self._is_mother_pipe = True
-    self._fold_data_hashes = []
-    self.inner_cv_callback_function = performance_constraints
-
- -
-
- -
- - -
-
-

def add(

self, pipe_element)

-
- - - - -
- -
-
def add(self, pipe_element):
-    self.__iadd__(pipe_element)
-
- -
-
- -
- - -
-
-

def config_to_dict(

self, specific_config)

-
- - - - -
- -
-
def config_to_dict(self, specific_config):
-    """
-    """
-    config = {}
-    for key, value in specific_config.items():
-        items = key.split('__')
-        name = items[0]
-        rest = '__'.join(items[1::])
-        if name in self._pipe.named_steps:
-            config.update(self._pipe.named_steps[name].prettify_config_output(rest, value, return_dict=True))
-            #config[name] = value
-    return config
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -

Helper function to copy all pipeline elements

-
- -
-
def copy_me(self):
-    """
-    Helper function to copy all pipeline elements
-    """
-    item_list =[]
-    for item in self.pipeline_elements:
-        item_list.append(item.copy_me())
-    return item_list
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets, **fit_params)

-
- - - - -

Starts the hyperparameter search and/or fits the pipeline to the data and targets

-

Manages the nested cross validated hyperparameter search:

-
    -
  1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
  2. -
  3. requests new configurations from the hyperparameter search strategy, the optimizer,
  4. -
  5. initializes the testing of a specific configuration,
  6. -
  7. communicates the result to the optimizer,
  8. -
  9. repeats 2-4 until optimizer delivers no more configurations to test
  10. -
  11. finally searches for the best config in all tested configs,
  12. -
  13. trains the pipeline with the best config and evaluates the performance on the test set
  14. -
-

Parameters

-
    -
  • -

    data [array-like, shape=[N, D]]: - the training and test data, where N is the number of samples and D is the number of features.

    -
  • -
  • -

    targets [array-like, shape=[N]]: - the truth values, where N is the number of samples.

    -
  • -
-

Returns

-
    -
  • 'self' - Returns self
  • -
-
- -
-
def fit(self, data, targets, **fit_params):
-    """
-    Starts the hyperparameter search and/or fits the pipeline to the data and targets
-    Manages the nested cross validated hyperparameter search:
-    1. Filters the data according to filter strategy (1) and according to the imbalanced_data_strategy (2)
-    2. requests new configurations from the hyperparameter search strategy, the optimizer,
-    3. initializes the testing of a specific configuration,
-    4. communicates the result to the optimizer,
-    5. repeats 2-4 until optimizer delivers no more configurations to test
-    6. finally searches for the best config in all tested configs,
-    7. trains the pipeline with the best config and evaluates the performance on the test set
-    Parameters
-    ----------
-     * `data` [array-like, shape=[N, D]]:
-        the training and test data, where N is the number of samples and D is the number of features.
-     * `targets` [array-like, shape=[N]]:
-        the truth values, where N is the number of samples.
-    Returns
-    -------
-     * 'self'
-        Returns self
-    """
-    # in case we want to inject some data from outside the pipeline
-    self.X = data
-    self.y = targets
-    # !!!!!!!!!!!!!!!! FIT ONLY IF DATA CHANGED !!!!!!!!!!!!!!!!!!!
-    # -------------------------------------------------------------
-    # in case we need to reduce the dimension of the data due to parallelity of the outer pipe, lets do it.
-    if self.filter_element:
-        self.X = self.filter_element.transform(self.X)
-    # if the groups are imbalanced, and a strategy is chosen, apply it here
-    if self.imbalanced_data_strategy_filter:
-        self.imbalanced_data_strategy_filter.fit(self.X, self.y)
-        self.X, self.y = self.imbalanced_data_strategy_filter.transform()
-    self._current_fold += 1
-    # be compatible to list of (image-) files
-    if isinstance(self.X, list):
-        self.X = np.asarray(self.X)
-    if isinstance(self.y, list):
-        self.y = np.asarray(self.y)
-    # handle neuro Imge paths as data
-    # ToDo: Need to check the DATA, not the img paths for neuro
-    new_data_hash = sha1(np.asarray(self.X, order='C')).hexdigest()
-    # fit
-    # 1. if it is first time ever or
-    # 2. the data did change for that fold or
-    # 3. if it is the mother pipe (then number_of_folds = 0)
-    if (len(self._fold_data_hashes) < self._num_of_folds) \
-            or (self._num_of_folds > 0 and self._fold_data_hashes[self._current_fold] != new_data_hash) \
-            or self._num_of_folds == 0:
-        # save data hash for that fold
-        if self._num_of_folds > 0:
-            if len(self._fold_data_hashes) < self._num_of_folds:
-                self._fold_data_hashes.append(new_data_hash)
-            else:
-                self._fold_data_hashes[self._current_fold] = new_data_hash
-        # optimize: iterate through configs and save results
-        if not self.is_final_fit:
-            # first check if correct optimizer metric has been chosen
-            # pass pipeline_elements so that OptimizerMetric can look for last
-            # element and use the corresponding score method
-            self.config_optimizer = OptimizerMetric(self.best_config_metric, self.pipeline_elements, self.metrics)
-            self.metrics = self.config_optimizer.check_metrics()
-            if 'score' in self.metrics:
-                Logger().warn('Attention: Scoring with default score function of estimator can slow down calculations!')
-            # generate OUTER ! cross validation splits to iterate over
-            self._generate_outer_cv_indices()
-            outer_fold_counter = 0
-            if not self._is_mother_pipe:
-                self.result_tree_name = self.name + '_outer_fold_' + str(self.__mother_outer_fold_counter)  \
-                                        + '_inner_fold_' + str(self.__mother_inner_fold_counter)
-            else:
-                self.result_tree_name = self.name
-            # initialize result logging with hyperpipe class
-            self.result_tree = MDBHyperpipe(name=self.result_tree_name)
-            self.result_tree.outer_folds = []
-            self.result_tree.eval_final_performance = self.eval_final_performance
-            self.result_tree.best_config_metric = self.best_config_metric
-            # loop over outer cross validation
-            for train_indices, test_indices in self.data_test_cases:
-                # give the optimizer the chance to inform about elements
-                self.optimizer.prepare(self.pipeline_elements)
-                outer_fold_counter += 1
-                outer_fold_fit_start_time = time.time()
-                Logger().info('HYPERPARAMETER SEARCH OF {0}, Outer Cross validation Fold {1}'
-                              .format(self.name, outer_fold_counter))
-                t1 = time.time()
-                # Prepare Train and validation set data
-                self._validation_X = self.X[train_indices]
-                self._validation_y = self.y[train_indices]
-                self._test_X = self.X[test_indices]
-                self._test_y = self.y[test_indices]
-                # Prepare inner cross validation
-                cv_iter = list(self.inner_cv.split(self._validation_X, self._validation_y))
-                num_folds = len(cv_iter)
-                num_samples_train = len(self._validation_y)
-                num_samples_test = len(self._test_y)
-                # distribute number of folds to encapsulated child hyperpipes
-                self._distribute_cv_info_to_hyperpipe_children(num_of_folds=num_folds,
-                                                               outer_fold_counter=outer_fold_counter)
-                tested_config_counter = 0
-                # add outer fold info object to result tree
-                outer_fold = MDBOuterFold(fold_nr=outer_fold_counter)
-                outer_fold.tested_config_list = []
-                self.result_tree.outer_folds.append(outer_fold)
-                # do the optimizing
-                for current_config in self.optimizer.next_config:
-                    self._distribute_cv_info_to_hyperpipe_children(reset=True, config_counter=tested_config_counter)
-                    hp = TestPipeline(self._pipe, current_config, self.metrics, self.update_mother_inner_fold_nr,
-                                      mongo_db_settings=self.persist_options,
-                                      callback_function=self.inner_cv_callback_function)
-                    Logger().debug('optimizing of:' + self.name)
-                    Logger().debug(self._optimize_printing(current_config))
-                    Logger().debug('calculating...')
-                    # Test the configuration cross validated by inner_cv object
-                    current_config_mdb = hp.calculate_cv_score(self._validation_X, self._validation_y, cv_iter,
-                                                        calculate_metrics_per_fold=self.calculate_metrics_per_fold,
-                                                        calculate_metrics_across_folds=self.calculate_metrics_across_folds)
-                    current_config_mdb.config_nr = tested_config_counter
-                    current_config_mdb.config_dict = current_config
-                    current_config_mdb.pipe_name = self.name
-                    tested_config_counter += 1
-                    current_config_mdb.human_readable_config = self.config_to_dict(current_config)
-                    # save the configuration of all children pipelines
-                    children_config = {}
-                    children_config_ref_list = []
-                    for pipe_step in self._pipe.steps:
-                        item = pipe_step[1]
-                        if isinstance(item, Hyperpipe):
-                            if item.local_search and item.best_config is not None:
-                                children_config[item.name] = item.best_config
-                        elif isinstance(item, PipelineStacking):
-                            for subhyperpipe_name, hyperpipe in item.pipe_elements.items():
-                                if isinstance(hyperpipe, Hyperpipe):
-                                    if hyperpipe.local_search and hyperpipe.best_config is not None:
-                                        # special case: we need to access pipe over pipeline_stacking element
-                                        children_config[item.name + '__' + subhyperpipe_name] = hyperpipe.best_config.config_dict
-                                    # children_config_ref_list.append(hyperpipe.best_config_outer_fold._id)
-                    specific_parameters = self._pipe.get_params()
-                    #current_config_mdb.full_model_spec = specific_parameters
-                    current_config_mdb.children_config_dict = children_config
-                    current_config_mdb.children_config_ref = children_config_ref_list
-                    Logger().verbose(self._optimize_printing(current_config))
-                    if not current_config_mdb.config_failed:
-                        # get optimizer_metric and forward to optimizer
-                        # todo: also pass greater_is_better=True/False to optimizer
-                        metric_train = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric)
-                        metric_test = MDBHelper.get_metric(current_config_mdb, FoldOperations.MEAN, self.config_optimizer.metric, train=False)
-                        #
-                        # if not metric_train or metric_test:
-                        #     raise Exception("Config did not fail, but did not get any metrics either....!!?")
-                        config_performance = (metric_train, metric_test)
-                        # Print Result for config
-                        Logger().debug('...done:')
-                        Logger().verbose(self.config_optimizer.metric + str(config_performance))
-                    else:
-                         config_performance = (-1, -1)
-                         # Print Result for config
-                         Logger().debug('...failed:')
-                         Logger().error(current_config_mdb.config_error)
-                    # add config to result tree and do intermediate saving
-                    self.result_tree.outer_folds[-1].tested_config_list.append(current_config_mdb)
-                    # Todo: add try catch in case config cannot be written
-                    self.mongodb_writer.save(self.result_tree)
-                    # 3. inform optimizer about performance
-                    self.optimizer.evaluate_recent_performance(current_config, config_performance)
-                if tested_config_counter > 0:
-                    best_config_outer_fold = self.config_optimizer.get_optimum_config(outer_fold.tested_config_list)
-                    if not best_config_outer_fold:
-                        raise Exception("No best config was found!")
-                    best_config_outer_fold_mdb = MDBConfig()
-                    best_config_outer_fold_mdb.children_config_dict = best_config_outer_fold.children_config_dict
-                    best_config_outer_fold_mdb.pipe_name = self.name
-                    best_config_outer_fold_mdb.children_config_ref = best_config_outer_fold.children_config_ref
-                    # best_config_outer_fold_mdb.best_config_ref_to_train_item = best_config_outer_fold._id
-                    best_config_outer_fold_mdb.config_dict = best_config_outer_fold.config_dict
-                    best_config_outer_fold_mdb.human_readable_config = best_config_outer_fold.human_readable_config
-                    # inform user
-                    Logger().info('finished optimization of ' + self.name)
-                    Logger().verbose('Result')
-                    Logger().verbose('Number of tested configurations:' + str(tested_config_counter))
-                    Logger().verbose('Optimizer metric: ' + self.config_optimizer.metric + '\n' +
-                                     '   --> Greater is better: ' + str(self.config_optimizer.greater_is_better))
-                    Logger().info('Best config: ' + self._optimize_printing(best_config_outer_fold_mdb.config_dict) +
-                                  '\n' + '... with children config: '
-                                  + self._optimize_printing(best_config_outer_fold_mdb.children_config_dict))
-                    # ... and create optimal pipeline
-                    self.optimum_pipe = self._pipe
-                    # set self to best config
-                    self.optimum_pipe.set_params(**best_config_outer_fold_mdb.config_dict)
-                    # set all children to best config and inform to NOT optimize again, ONLY fit
-                    for child_name, child_config in best_config_outer_fold_mdb.children_config_dict.items():
-                        if child_config:
-                            # in case we have a pipeline stacking we need to identify the particular subhyperpipe
-                            splitted_name = child_name.split('__')
-                            if len(splitted_name) > 1:
-                                stacking_element = self.optimum_pipe.named_steps[splitted_name[0]]
-                                pipe_element = stacking_element.pipe_elements[splitted_name[1]]
-                            else:
-                                pipe_element = self.optimum_pipe.named_steps[child_name]
-                            pipe_element.set_params(**child_config)
-                            pipe_element.is_final_fit = True
-                    self._distribute_cv_info_to_hyperpipe_children(reset=True)
-                    Logger().verbose('...now fitting ' + self.name + ' with optimum configuration')
-                    fit_time_start = time.time()
-                    self.optimum_pipe.fit(self._validation_X, self._validation_y)
-                    final_fit_duration = time.time() - fit_time_start
-                    #self.best_config_outer_fold.full_model_spec = self.optimum_pipe.get_params()
-                    best_config_outer_fold_mdb.fit_duration_minutes = final_fit_duration
-                    self.result_tree.outer_folds[-1].best_config = best_config_outer_fold_mdb
-                    self.result_tree.outer_folds[-1].best_config.inner_folds = []
-                    if self.eval_final_performance:
-                        # Todo: generate mean and std over outer folds as well. move this items to the top
-                        Logger().verbose('...now predicting ' + self.name + ' unseen data')
-                        test_score_mdb = TestPipeline.score(self.optimum_pipe, self._test_X, self._test_y,
-                                                            self.metrics,
-                                                            save_predictions=self.persist_options.save_predictions,
-                                                            save_feature_importances=self.persist_options.save_feature_importances)
-                        Logger().info('.. calculating metrics for test set (' + self.name + ')')
-                        Logger().verbose('...now predicting ' + self.name + ' final model with training data')
-                        train_score_mdb = TestPipeline.score(self.optimum_pipe, self._validation_X, self._validation_y,
-                                                             self.metrics,
-                                                             save_predictions=self.persist_options.save_predictions,
-                                                             save_feature_importances=self.persist_options.save_feature_importances)
-                        # save test fold
-                        outer_fold_mdb = MDBInnerFold()
-                        outer_fold_mdb.fold_nr = 1
-                        outer_fold_mdb.number_samples_training = num_samples_train
-                        outer_fold_mdb.number_samples_validation = num_samples_test
-                        outer_fold_mdb.training = train_score_mdb
-                        outer_fold_mdb.validation = test_score_mdb
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-                        Logger().info('PERFORMANCE TRAIN:')
-                        for m_key, m_value in train_score_mdb.metrics.items():
-                            Logger().info(str(m_key) + ": " + str(m_value))
-                        Logger().info('PERFORMANCE TEST:')
-                        for m_key, m_value in test_score_mdb.metrics.items():
-                                Logger().info(str(m_key) + ": " + str(m_value))
-                    else:
-                        # save test fold
-                        outer_fold_mdb = MDBInnerFold()
-                        outer_fold_mdb.fold_nr = 1
-                        outer_fold_mdb.number_samples_training = num_samples_train
-                        outer_fold_mdb.number_samples_validation = num_samples_test
-                        def _copy_inner_fold_means(metric_dict):
-                            # We copy all mean values from validation to the best config
-                            # training
-                            train_item_metrics = {}
-                            for m in metric_dict:
-                                if m.operation == str(FoldOperations.MEAN):
-                                    train_item_metrics[m.metric_name] = m.value
-                            train_item = MDBScoreInformation()
-                            train_item.metrics_copied_from_inner = True
-                            train_item.metrics = train_item_metrics
-                            return train_item
-                        # training
-                        outer_fold_mdb.training = _copy_inner_fold_means(best_config_outer_fold.metrics_train)
-                        # validation
-                        outer_fold_mdb.validation = _copy_inner_fold_means(best_config_outer_fold.metrics_test)
-                        self.result_tree.outer_folds[-1].best_config.inner_folds = [outer_fold_mdb]
-                Logger().info('This took {} minutes.'.format((time.time() - t1) / 60))
-                self.result_tree.time_of_results = datetime.datetime.now()
-                self.mongodb_writer.save(self.result_tree)
-                self._distribute_cv_info_to_hyperpipe_children(reset_final_fit=True, outer_fold_counter=outer_fold_counter)
-            # Compute all final metrics
-            self.result_tree.metrics_train, self.result_tree.metrics_test = MDBHelper.aggregate_metrics(self.result_tree.outer_folds,
-                                                                                                        self.metrics)
-            # save result tree to db or file or both
-            self.mongodb_writer.save(self.result_tree)
-            Logger().info("Saved result tree to database")
-            # Find best config across outer folds
-            self.best_config = self.config_optimizer.get_optimum_config_outer_folds(self.result_tree.outer_folds)
-            self.result_tree.best_config = self.best_config
-            Logger().info('OVERALL BEST CONFIGURATION')
-            Logger().info('--------------------------')
-            Logger().info(self._optimize_printing(self.best_config.config_dict) +
-                          '\n' + '... with children config: '
-                          + self._optimize_printing(self.best_config.children_config_dict))
-            # set self to best config
-            self.optimum_pipe = self._pipe
-            self.optimum_pipe.set_params(**self.best_config.config_dict)
-            self.optimum_pipe.fit(self._validation_X, self._validation_y)
-            # save results again
-            self.mongodb_writer.save(self.result_tree)
-            Logger().info("Saved overall best config to database")
-        ###############################################################################################
-        else:
-            self._pipe.fit(self.X, self.y, **fit_params)
-    else:
-        Logger().verbose("Avoided fitting of " + self.name + " on fold "
-                         + str(self._current_fold) + " because data did not change")
-        Logger().verbose('Best config of ' + self.name + ' : ' + str(self.best_config))
-    return self
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Retrieve parameters from sklearn pipeline

-
- -
-
def get_params(self, deep=True):
-    """
-    Retrieve parameters from sklearn pipeline
-    """
-    if self._pipe is not None:
-        return self._pipe.get_params(deep)
-    else:
-        return None
-
- -
-
- -
- - -
-
-

def inverse_transform_pipeline(

self, hyperparameters, data, targets, data_to_inverse)

-
- - - - -

Inverse transform data for a pipeline with specific hyperparameter configuration

-
    -
  1. Copy Sklearn Pipeline,
  2. -
  3. Set Parameters
  4. -
  5. Fit Pipeline to data and targets
  6. -
  7. Inverse transform data with that pipeline
  8. -
-

Parameters

-
    -
  • 'hyperparameters' [dict]: - The concrete configuration settings for the pipeline elements
  • -
  • 'data' [array-like]: - The training data to which the pipeline is fitted
  • -
  • 'targets' [array-like]: - The truth values for training
  • -
  • 'data_to_inverse' [array-like]: - The data that should be inversed after training
  • -
-

Returns

-

Inversed data as array

-
- -
-
def inverse_transform_pipeline(self, hyperparameters: dict, data, targets, data_to_inverse):
-    """
-    Inverse transform data for a pipeline with specific hyperparameter configuration
-    1. Copy Sklearn Pipeline,
-    2. Set Parameters
-    3. Fit Pipeline to data and targets
-    4. Inverse transform data with that pipeline
-    Parameters
-    ----------
-    * 'hyperparameters' [dict]:
-        The concrete configuration settings for the pipeline elements
-    * 'data' [array-like]:
-        The training data to which the pipeline is fitted
-    * 'targets' [array-like]:
-        The truth values for training
-    * 'data_to_inverse' [array-like]:
-        The data that should be inversed after training
-    Returns
-    -------
-    Inversed data as array
-    """
-    copied_pipe = self._copy_pipeline()
-    copied_pipe.set_params(**hyperparameters)
-    copied_pipe.fit(data, targets)
-    return copied_pipe.inverse_transform(data_to_inverse)
-
- -
-
- -
- - -
-
-

def load_optimum_pipe(

file)

-
- - - - -

Load optimal pipeline.

-

Parameters

-
    -
  • 'file' [str]: - File path specifying .photon file to load optimal pipeline from
  • -
-

Returns

-

sklearn Pipeline with all trained photon_pipelines

-
- -
-
@staticmethod
-def load_optimum_pipe(file):
-    """
-    Load optimal pipeline.
-    Parameters
-    ----------
-    * 'file' [str]:
-        File path specifying .photon file to load optimal pipeline from
-    Returns
-    -------
-    sklearn Pipeline with all trained photon_pipelines
-    """
-    if file.endswith('.photon'):
-        archive_name = os.path.splitext(file)[0]
-        folder = archive_name + '/'
-        zf = zipfile.ZipFile(file)
-        zf.extractall(folder)
-    else:
-        raise FileNotFoundError('Specify .photon file that holds PHOTON optimum pipe.')
-    setup_info = pickle.load(open(folder + '_optimum_pipe_blueprint.pkl', 'rb'))
-    element_list = list()
-    for element_info in setup_info:
-        if element_info['mode'] == 'custom':
-            spec = importlib.util.spec_from_file_location(element_info['element_name'],
-                                                          folder + element_info['wrapper_script'])
-            imported_module = importlib.util.module_from_spec(spec)
-            spec.loader.exec_module(imported_module)
-            base_element = getattr(imported_module, element_info['element_name'])
-            custom_element = PipelineElement(name=element_info['element_name'], base_element=base_element(),
-                                             hyperparameters=element_info['hyperparameters'],
-                                             test_disabled=element_info['test_disabled'],
-                                             disabled=element_info['disabled'])
-            custom_element.base_element.load(folder + element_info['filename'])
-            element_list.append((element_info['element_name'], custom_element))
-        else:
-            element_list.append((element_info['element_name'], joblib.load(folder + element_info['filename'] + '.pkl')))
-    return Pipeline(element_list)
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Use the optimum pipe to predict the data

-

Returns

-
predicted targets
-
-
- -
-
def predict(self, data):
-    """
-    Use the optimum pipe to predict the data
-    Returns
-    -------
-        predicted targets
-    """
-    # Todo: if local_search = true then use optimized pipe here?
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.predict(data)
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities

-

Returns

-

predicted probabilities

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    Returns
-    -------
-    predicted probabilities
-    """
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.predict_proba(data)
-
- -
-
- -
- - -
-
-

def prettify_config_output(

config_name, config_value)

-
- - - - -

Print the disabled = False as Enabled = True for better human reading

-
- -
-
@staticmethod
-def prettify_config_output(config_name: str, config_value):
-    """
-    Print the disabled = False as Enabled = True for better human reading
-    """
-    if config_name == "disabled" and config_value is False:
-        return "enabled = True"
-    else:
-        return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def save_optimum_pipe(

self, file)

-
- - - - -

Save optimal pipeline only. Complete hyperpipe will no not be saved.

-

Parameters

-
    -
  • 'file' [str]: - File path as string specifying file to save pipeline to
  • -
-
- -
-
def save_optimum_pipe(self, file):
-    """
-    Save optimal pipeline only. Complete hyperpipe will no not be saved.
-    Parameters
-    ----------
-    * 'file' [str]:
-        File path as string specifying file to save pipeline to
-    """
-    element_number = 0
-    element_identifier = list()
-    folder = os.path.splitext(file)[0]
-    file = os.path.splitext(file)[0] + '.photon'
-    if os.path.exists(folder):
-        raise FileExistsError('Trying to save optimum pipe: The file you specified already exists as a folder.')
-    else:
-        os.mkdir(folder)
-        folder = folder + '/'
-    wrapper_files = list()
-    for element_name, element in self.optimum_pipe.named_steps.items():
-        filename = '_optimum_pipe_' + str(element_number) + '_' + element_name
-        element_identifier.append({'element_name': element_name,
-                                   'filename': filename})
-        if hasattr(element.base_element, 'save'):
-            element.base_element.save(folder + filename)
-            element_identifier[-1]['mode'] = 'custom'
-            element_identifier[-1]['wrapper_script'] = os.path.basename(inspect.getfile(element.base_element.__class__))
-            wrapper_files.append(inspect.getfile(element.base_element.__class__))
-            element_identifier[-1]['test_disabled'] = element.test_disabled
-            element_identifier[-1]['disabled'] = element.disabled
-            element_identifier[-1]['hyperparameters'] = element.hyperparameters
-        else:
-            try:
-                joblib.dump(element, folder + filename + '.pkl', compress=1)
-                element_identifier[-1]['mode'] = 'pickle'
-            except:
-                raise NotImplementedError("Custom pipeline element must implement .save() method or "
-                                          "allow pickle.")
-        element_number += 1
-    # save pipeline blueprint to make loading of pipeline easier
-    with open(folder + '_optimum_pipe_blueprint.pkl', 'wb') as f:
-        pickle.dump(element_identifier, f)
-    # get all files
-    files = glob.glob(folder + '_optimum_pipe_*')
-    with zipfile.ZipFile(file, 'w') as myzip:
-        for f in files:
-            myzip.write(f, os.path.basename(f))
-            os.remove(f)
-        for f in wrapper_files:
-            myzip.write(f, os.path.splitext(os.path.basename(f))[0] + '.py')
-    os.removedirs(folder)
-
- -
-
- -
- - -
-
-

def set_params(

self, **params)

-
- - - - -

Give parameter values to the pipeline elements

-
- -
-
def set_params(self, **params):
-    """
-    Give parameter values to the pipeline elements
-    """
-    if self._pipe is not None:
-        self._pipe.set_params(**params)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Use the optimum pipe to transform the data

-
- -
-
def transform(self, data):
-    """
-    Use the optimum pipe to transform the data
-    """
-    if self._pipe:
-        if self.filter_element:
-            data = self.filter_element.transform(data)
-        return self.optimum_pipe.transform(data)
-
- -
-
- -
- - -
-
-

def update_mother_inner_fold_nr(

self, new_inner_fold_nr)

-
- - - - -

Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children

-

Parameters

-
    -
  • 'new_inner_fold_nr' [int]: - in which inner_fold the mother hyperpipe currently is
  • -
-
- -
-
def update_mother_inner_fold_nr(self, new_inner_fold_nr: int):
-    """
-    Function handle so that the TestPipeline class from Photon's validation module can pass the information to hyperpipe children
-    Parameters
-    ----------
-    * 'new_inner_fold_nr' [int]:
-        in which inner_fold the mother hyperpipe currently is
-    """
-    self._distribute_cv_info_to_hyperpipe_children(inner_fold_counter=new_inner_fold_nr)
-
- -
-
- -
- - - - -
-
- -
-

class Investigator

- - -

Instantiates a Flask website that shows you the results of the hyperparameter search, the best configuration, -all of its performances etc.

-
- -
-
class Investigator:
-    """
-    Instantiates a Flask website that shows you the results of the hyperparameter search, the best configuration,
-    all of its performances etc.
-    """
-
-    @staticmethod
-    def __build_url(storage: str, name: str):
-        """
-        creates a localhost url for displaying a pipeline according to its source (working memory, file or mongodb)
-        """
-        url = "http://localhost:7273/pipeline/" + storage + "/" + name
-        return url
-
-    @staticmethod
-    def show(pipe: Hyperpipe):
-        """
-        Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space
-
-        Parameters
-        ----------
-        * 'pipe' [Hyperpipe]:
-            The Hyperpipe object that has performed hyperparameter search
-
-        """
-
-        assert isinstance(pipe, Hyperpipe), "Investigator.show needs an object of type Hyperpipe"
-        assert pipe is not None, "Investigator.show needs an object of Hyperpipe, is None"
-        assert pipe.result_tree is not None, "Investigator.show needs an Hyperpipe that is already optimized, so it can show the result tree"
-        # make sure that Flask is running
-        FlaskManager().set_pipe_object(pipe.name, pipe.result_tree)
-        url = Investigator.__build_url("a", pipe.name)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-    @staticmethod
-    def load_from_db(mongo_connect_url: str, pipe_name: str):
-        """
-        Opens the PHOTON investigator and
-        loads a hyperpipe's performance results from a MongoDB instance
-
-        Parameters
-        ---------
-        * 'mongo_connect_url' [str]:
-            The MongoDB connection string including the database name
-        * 'pipe_name' [str]:
-            The name of the pipeline to load
-        """
-        FlaskManager().set_mongo_db_url(mongo_connect_url)
-        url = Investigator.__build_url("m", pipe_name)
-        Logger().info("Your url is: " + url)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-
-    @staticmethod
-    def load_many_from_db(mongo_connect_url: str, pipe_names: list):
-        """
-        Opens the PHOTON investigator and
-        loads a hyperpipe performance results from a MongoDB instance
-
-        Parameters
-        ---------
-        * 'mongo_connect_url' [str]:
-            The MongoDB connection string including the database name
-        * 'pipe_names' [list]:
-            A list of the hyperpipe objects to load
-        """
-
-        FlaskManager().set_mongo_db_url(mongo_connect_url)
-        for pipe in pipe_names:
-            url = Investigator.__build_url("m", pipe)
-            Logger().info("Your url is: " + url)
-        FlaskManager().run_app()
-
-    @staticmethod
-    def load_from_file(name: str, file_url: str):
-        """
-        Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-
-        Parameters
-        ----------
-        * 'name' [str]:
-            The name of the hyperpipe object that you want to load
-        * 'file_url' [str]:
-            The path to the file in which the hyperparameter search results are encoded.
-        """
-        assert os.path.isfile(file_url), "File" + file_url + " does not exist or is not a file. Please give absolute path."
-        FlaskManager().set_pipe_file(name, file_url)
-        url = Investigator.__build_url("f", name)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-    # @staticmethod
-    # def load_files(file_list: list):
-    #     """
-    #        Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-    #
-    #        Parameters
-    #        ----------
-    #        * 'file_url' [str]:
-    #            The path to the file in which the hyperparameter search results are encoded.
-    #     """
-    #     for file_url in file_list:
-    #         Investigator.load_from_file("" file_url)
-
-    @staticmethod
-    def __open_browser(url):
-        # we delay the browser opening for 2 seconds so that flask server can start in the meanwhile
-        slp(2)
-        webbrowser.open(url)
-
-    @staticmethod
-    def __delayed_browser(url):
-        Investigator.__open_browser(url)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def load_from_db(

mongo_connect_url, pipe_name)

-
- - - - -

Opens the PHOTON investigator and -loads a hyperpipe's performance results from a MongoDB instance

-

Parameters

-
    -
  • 'mongo_connect_url' [str]: - The MongoDB connection string including the database name
  • -
  • 'pipe_name' [str]: - The name of the pipeline to load
  • -
-
- -
-
@staticmethod
-def load_from_db(mongo_connect_url: str, pipe_name: str):
-    """
-    Opens the PHOTON investigator and
-    loads a hyperpipe's performance results from a MongoDB instance
-    Parameters
-    ---------
-    * 'mongo_connect_url' [str]:
-        The MongoDB connection string including the database name
-    * 'pipe_name' [str]:
-        The name of the pipeline to load
-    """
-    FlaskManager().set_mongo_db_url(mongo_connect_url)
-    url = Investigator.__build_url("m", pipe_name)
-    Logger().info("Your url is: " + url)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def load_from_file(

name, file_url)

-
- - - - -

Opens the PHOTON investigator and loads the hyperpipe search results from the file path

-

Parameters

-
    -
  • 'name' [str]: - The name of the hyperpipe object that you want to load
  • -
  • 'file_url' [str]: - The path to the file in which the hyperparameter search results are encoded.
  • -
-
- -
-
@staticmethod
-def load_from_file(name: str, file_url: str):
-    """
-    Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-    Parameters
-    ----------
-    * 'name' [str]:
-        The name of the hyperpipe object that you want to load
-    * 'file_url' [str]:
-        The path to the file in which the hyperparameter search results are encoded.
-    """
-    assert os.path.isfile(file_url), "File" + file_url + " does not exist or is not a file. Please give absolute path."
-    FlaskManager().set_pipe_file(name, file_url)
-    url = Investigator.__build_url("f", name)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def load_many_from_db(

mongo_connect_url, pipe_names)

-
- - - - -

Opens the PHOTON investigator and -loads a hyperpipe performance results from a MongoDB instance

-

Parameters

-
    -
  • 'mongo_connect_url' [str]: - The MongoDB connection string including the database name
  • -
  • 'pipe_names' [list]: - A list of the hyperpipe objects to load
  • -
-
- -
-
@staticmethod
-def load_many_from_db(mongo_connect_url: str, pipe_names: list):
-    """
-    Opens the PHOTON investigator and
-    loads a hyperpipe performance results from a MongoDB instance
-    Parameters
-    ---------
-    * 'mongo_connect_url' [str]:
-        The MongoDB connection string including the database name
-    * 'pipe_names' [list]:
-        A list of the hyperpipe objects to load
-    """
-    FlaskManager().set_mongo_db_url(mongo_connect_url)
-    for pipe in pipe_names:
-        url = Investigator.__build_url("m", pipe)
-        Logger().info("Your url is: " + url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def show(

pipe)

-
- - - - -

Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space

-

Parameters

-
    -
  • 'pipe' [Hyperpipe]: - The Hyperpipe object that has performed hyperparameter search
  • -
-
- -
-
@staticmethod
-def show(pipe: Hyperpipe):
-    """
-    Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space
-    Parameters
-    ----------
-    * 'pipe' [Hyperpipe]:
-        The Hyperpipe object that has performed hyperparameter search
-    """
-    assert isinstance(pipe, Hyperpipe), "Investigator.show needs an object of type Hyperpipe"
-    assert pipe is not None, "Investigator.show needs an object of Hyperpipe, is None"
-    assert pipe.result_tree is not None, "Investigator.show needs an Hyperpipe that is already optimized, so it can show the result tree"
-    # make sure that Flask is running
-    FlaskManager().set_pipe_object(pipe.name, pipe.result_tree)
-    url = Investigator.__build_url("a", pipe.name)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- -
-
- -
-

class PhotonRegister

- - -

Helper class to manage the PHOTON Element Register.

-

Use it to add and remove items into the register. -You can also retrieve information about items and its hyperparameters.

-

Every item in the register is encoded by a string literal that points to a python class and its namespace. -You can access the python class via the string literal. -The class PhotonElement imports and instantiates the class for you.

-

There is a distinct json file with the elements registered for each photon package (core, neuro, genetics, ..) -There is also a json file for the user's custom elements.

-

Example:

-

get info about object, name, namespace and possible hyperparameters

-

PhotonRegister.info("SVC")

-

show all items that are registered

-

PhotonRegister.list()

-

register new object

-

PhotonRegister.save("ABC1", "namespace.filename.ABC1", "Transformer")

-

delete it again.

-

PhotonRegister.delete("ABC1")

-
- -
-
class PhotonRegister:
-    """
-    Helper class to manage the PHOTON Element Register.
-
-    Use it to add and remove items into the register.
-    You can also retrieve information about items and its hyperparameters.
-
-    Every item in the register is encoded by a string literal that points to a python class and its namespace.
-    You can access the python class via the string literal.
-    The class PhotonElement imports and instantiates the class for you.
-
-    There is a distinct json file with the elements registered for each photon package (core, neuro, genetics, ..)
-    There is also a json file for the user's custom elements.
-
-    Example:
-    --------
-    # get info about object, name, namespace and possible hyperparameters
-    PhotonRegister.info("SVC")
-
-    # show all items that are registered
-    PhotonRegister.list()
-
-    # register new object
-    PhotonRegister.save("ABC1", "namespace.filename.ABC1", "Transformer")
-
-    # delete it again.
-    PhotonRegister.delete("ABC1")
-
-    """
-
-    PHOTON_REGISTERS = ['PhotonCore', 'PhotonNeuro', 'CustomElements']
-
-    def __init__(self):
-        pass
-
-    @staticmethod
-    def save(photon_name: str, class_str: str, element_type: str, photon_package: str = "CustomElements"):
-        """
-        Save Element information to the JSON file
-
-        Parameters:
-        -----------
-        * 'photon_name' [str]:
-          The string literal with which you want to access the class
-        * 'class_str' [str]:
-          The namespace of the class, like in the import statement
-        * 'element_type' [str]:
-          Can be 'Estimator' or 'Transformer'
-        * 'photon_package' [str]:
-          The photonai module name e.g. photon_core, photon_neuro
-
-        """
-        # register_element and jsonify
-
-        if element_type != "Estimator" or element_type != "Transformer":
-            Logger().error("Variable element_type must be 'Estimator' or 'Transformer'")
-
-        duplicate = PhotonRegister.check_duplicate(photon_name, class_str)
-        if not duplicate:
-            content, _ = PhotonRegister.get_json(photon_package)  # load existing json
-            # add new element
-            content[photon_name] = class_str, element_type
-
-            # write back to file
-            PhotonRegister.write2json(content, photon_package)
-
-            Logger().info('Adding PipelineElement ' + class_str + ' to ' +
-                          photon_package + ' as "' + photon_name + '".')
-
-
-        else:
-            Logger().error('Could not register pipeline element due to duplicates.')
-
-    @staticmethod
-    def info(photon_name):
-        """
-        Show information for object that is encoded by this name.
-
-        Parameters:
-        -----------
-        * 'photon_name' [str]:
-          The string literal which accesses the class
-        """
-        content = PhotonRegister.get_package_info(PhotonRegister.PHOTON_REGISTERS)  # load existing json
-
-        if photon_name in content:
-            element_namespace, element_name = content[photon_name]
-
-            print("----------------------------------")
-            print("Name: " + element_name)
-            print("Namespace: " + element_namespace)
-            print("----------------------------------")
-
-            try:
-                imported_module = __import__(element_namespace, globals(), locals(), element_name, 0)
-                desired_class = getattr(imported_module, element_name)
-                base_element = desired_class()
-                print("Possible Hyperparameters as derived from constructor:")
-                class_args = inspect.signature(base_element.__init__)
-                for item, more_info in class_args.parameters.items():
-                    print("{:<35} {:<75}".format(item, str(more_info)))
-                print("----------------------------------")
-            except Exception as e:
-                Logger().error(e)
-                Logger().error("Could not instantiate class " + element_namespace + "." + element_name)
-        else:
-            Logger().error("Could not find element " + photon_name)
-
-
-
-    @staticmethod
-    def delete(photon_name, photon_package="CustomElements"):
-        """
-        Delete Element from JSON file
-
-        Parameters:
-        -----------
-        * 'photon_name' [str]:
-          The string literal encoding the class
-        """
-        content, _ = PhotonRegister.get_json(photon_package)  # load existing json
-
-        if photon_name in content:
-            del content[photon_name]
-
-        PhotonRegister.write2json(content, photon_package)
-        Logger().info('Removing the PipelineElement named "{0}" from {1}.'
-                      .format(photon_name, photon_package))
-
-    @staticmethod
-    def check_duplicate(photon_name, class_str):
-        """
-        Helper function to check if the entry is either registered by a different name or if the name is already given
-        to another class
-
-         Parameters:
-        -----------
-        * 'photon_name' [str]:
-          The name with which the class should be accessed
-        * 'class_str' [str]:
-          The namespace.Classname, where the class lives, from where it should be imported.
-
-        Returns:
-        --------
-        Bool, False if there is no key with this name and the class is not already registered with another key
-        """
-
-        content = PhotonRegister.get_package_info(PhotonRegister.PHOTON_REGISTERS)  # load existing json
-
-        # check for duplicate name (dict key)
-        flag = 0
-        if photon_name in content:
-           flag += 1
-           Logger().info('A PipelineElement named ' + photon_name + ' has already been registered.')
-
-        # check for duplicate class_str
-        if any(class_str in '.'.join([s[0], s[1]]) for s in content.values()):
-            flag += 1
-            Logger().info('The Class named ' + class_str + ' has already been registered.')
-
-        return flag > 0
-
-    # one json file per Photon Package (Core, Neuro, Genetics, Designer (if necessary)
-    @staticmethod
-    def get_json(photon_package: str):
-        """
-        Load JSON file in which the elements for the PHOTON submodule are stored.
-
-        The JSON files are stored in the framework folder by the name convention 'photon_package.json'
-
-        Parameters:
-        -----------
-        * 'photon_package' [str]:
-          The name of the photonai submodule
-
-        Returns:
-        --------
-        JSON file as dict, file path as str
-        """
-        file_name = os.path.dirname(inspect.getfile(PhotonRegister)) + '/' + photon_package + '.json'
-        file_content = {}
-        if os.path.isfile(file_name):
-            # Reading json
-            with open(file_name, 'r') as f:
-                try:
-                    file_content = json.load(f)
-                except json.JSONDecodeError as jde:
-                    # handle empty file
-                    if jde.msg == 'Expecting value':
-                        Logger().error("Package File " + file_name + " was empty.")
-                    else:
-                        raise jde
-            file_path = file_name
-        else:
-            file_content = dict()
-            file_path = None
-            print(file_name + ' not found. Creating file.')
-
-        return file_content, file_path
-
-    @staticmethod
-    def write2json(content2write: dict, photon_package: str):
-        """
-        Write json content to file
-
-        Parameters:
-        -----------
-        * 'content2write' [dict]:
-          The new information to attach to the file
-        * 'photon_package' [str]:
-          The PHOTON submodule name to which the new class belongs, so it is written to the correct json file
-        """
-        file_name = os.path.dirname(inspect.getfile(PhotonRegister)) + '/' + photon_package + '.json'
-        # Writing JSON data
-        with open(file_name, 'w') as f:
-            json.dump(content2write, f)
-
-    @staticmethod
-    def get_package_info(photon_package: list = PHOTON_REGISTERS) -> dict:
-        """
-        Collect all registered elements from JSON file
-
-        Parameters:
-        -----------
-        * 'photon_package' [list]:
-          The names of the PHOTON submodules for which the elements should be retrieved
-
-        Returns
-        -------
-        Dict of registered elements
-        """
-        class_info = dict()
-        for package in photon_package:
-
-            content, _ = PhotonRegister.get_json(package)
-
-            for key in content:
-                class_path, class_name = os.path.splitext(content[key][0])
-                class_info[key] = class_path, class_name[1:]
-        return class_info
-
-    @staticmethod
-    def list(photon_package: list = PHOTON_REGISTERS):
-        """
-        Print info about all items that are registered for the PHOTON submodule to the console.
-
-        Parameters:
-        -----------
-        * 'photon_package' [list]:
-          The names of the PHOTON submodules for which the elements should be retrieved
-        """
-        for package in photon_package:
-            content, file_name = PhotonRegister.get_json(package)
-            print('\n' + package + ' (' + file_name + ')')
-            for k, v in sorted(content.items()):
-                class_info, package_type = v
-                print("{:<35} {:<75} {:<5}".format(k, class_info, package_type))
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var PHOTON_REGISTERS

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self):
-    pass
-
- -
-
- -
- - -
-
-

def check_duplicate(

photon_name, class_str)

-
- - - - -

Helper function to check if the entry is either registered by a different name or if the name is already given -to another class

-

Parameters:

-
    -
  • 'photon_name' [str]: - The name with which the class should be accessed
  • -
  • 'class_str' [str]: - The namespace.Classname, where the class lives, from where it should be imported.
  • -
-

Returns:

-

Bool, False if there is no key with this name and the class is not already registered with another key

-
- -
-
@staticmethod
-def check_duplicate(photon_name, class_str):
-    """
-    Helper function to check if the entry is either registered by a different name or if the name is already given
-    to another class
-     Parameters:
-    -----------
-    * 'photon_name' [str]:
-      The name with which the class should be accessed
-    * 'class_str' [str]:
-      The namespace.Classname, where the class lives, from where it should be imported.
-    Returns:
-    --------
-    Bool, False if there is no key with this name and the class is not already registered with another key
-    """
-    content = PhotonRegister.get_package_info(PhotonRegister.PHOTON_REGISTERS)  # load existing json
-    # check for duplicate name (dict key)
-    flag = 0
-    if photon_name in content:
-       flag += 1
-       Logger().info('A PipelineElement named ' + photon_name + ' has already been registered.')
-    # check for duplicate class_str
-    if any(class_str in '.'.join([s[0], s[1]]) for s in content.values()):
-        flag += 1
-        Logger().info('The Class named ' + class_str + ' has already been registered.')
-    return flag > 0
-
- -
-
- -
- - -
-
-

def delete(

photon_name, photon_package='CustomElements')

-
- - - - -

Delete Element from JSON file

-

Parameters:

-
    -
  • 'photon_name' [str]: - The string literal encoding the class
  • -
-
- -
-
@staticmethod
-def delete(photon_name, photon_package="CustomElements"):
-    """
-    Delete Element from JSON file
-    Parameters:
-    -----------
-    * 'photon_name' [str]:
-      The string literal encoding the class
-    """
-    content, _ = PhotonRegister.get_json(photon_package)  # load existing json
-    if photon_name in content:
-        del content[photon_name]
-    PhotonRegister.write2json(content, photon_package)
-    Logger().info('Removing the PipelineElement named "{0}" from {1}.'
-                  .format(photon_name, photon_package))
-
- -
-
- -
- - -
-
-

def get_json(

photon_package)

-
- - - - -

Load JSON file in which the elements for the PHOTON submodule are stored.

-

The JSON files are stored in the framework folder by the name convention 'photon_package.json'

-

Parameters:

-
    -
  • 'photon_package' [str]: - The name of the photonai submodule
  • -
-

Returns:

-

JSON file as dict, file path as str

-
- -
-
@staticmethod
-def get_json(photon_package: str):
-    """
-    Load JSON file in which the elements for the PHOTON submodule are stored.
-    The JSON files are stored in the framework folder by the name convention 'photon_package.json'
-    Parameters:
-    -----------
-    * 'photon_package' [str]:
-      The name of the photonai submodule
-    Returns:
-    --------
-    JSON file as dict, file path as str
-    """
-    file_name = os.path.dirname(inspect.getfile(PhotonRegister)) + '/' + photon_package + '.json'
-    file_content = {}
-    if os.path.isfile(file_name):
-        # Reading json
-        with open(file_name, 'r') as f:
-            try:
-                file_content = json.load(f)
-            except json.JSONDecodeError as jde:
-                # handle empty file
-                if jde.msg == 'Expecting value':
-                    Logger().error("Package File " + file_name + " was empty.")
-                else:
-                    raise jde
-        file_path = file_name
-    else:
-        file_content = dict()
-        file_path = None
-        print(file_name + ' not found. Creating file.')
-    return file_content, file_path
-
- -
-
- -
- - -
-
-

def get_package_info(

photon_package=['PhotonCore', 'PhotonNeuro', 'CustomElements'])

-
- - - - -

Collect all registered elements from JSON file

-

Parameters:

-
    -
  • 'photon_package' [list]: - The names of the PHOTON submodules for which the elements should be retrieved
  • -
-

Returns

-

Dict of registered elements

-
- -
-
@staticmethod
-def get_package_info(photon_package: list = PHOTON_REGISTERS) -> dict:
-    """
-    Collect all registered elements from JSON file
-    Parameters:
-    -----------
-    * 'photon_package' [list]:
-      The names of the PHOTON submodules for which the elements should be retrieved
-    Returns
-    -------
-    Dict of registered elements
-    """
-    class_info = dict()
-    for package in photon_package:
-        content, _ = PhotonRegister.get_json(package)
-        for key in content:
-            class_path, class_name = os.path.splitext(content[key][0])
-            class_info[key] = class_path, class_name[1:]
-    return class_info
-
- -
-
- -
- - -
-
-

def info(

photon_name)

-
- - - - -

Show information for object that is encoded by this name.

-

Parameters:

-
    -
  • 'photon_name' [str]: - The string literal which accesses the class
  • -
-
- -
-
@staticmethod
-def info(photon_name):
-    """
-    Show information for object that is encoded by this name.
-    Parameters:
-    -----------
-    * 'photon_name' [str]:
-      The string literal which accesses the class
-    """
-    content = PhotonRegister.get_package_info(PhotonRegister.PHOTON_REGISTERS)  # load existing json
-    if photon_name in content:
-        element_namespace, element_name = content[photon_name]
-        print("----------------------------------")
-        print("Name: " + element_name)
-        print("Namespace: " + element_namespace)
-        print("----------------------------------")
-        try:
-            imported_module = __import__(element_namespace, globals(), locals(), element_name, 0)
-            desired_class = getattr(imported_module, element_name)
-            base_element = desired_class()
-            print("Possible Hyperparameters as derived from constructor:")
-            class_args = inspect.signature(base_element.__init__)
-            for item, more_info in class_args.parameters.items():
-                print("{:<35} {:<75}".format(item, str(more_info)))
-            print("----------------------------------")
-        except Exception as e:
-            Logger().error(e)
-            Logger().error("Could not instantiate class " + element_namespace + "." + element_name)
-    else:
-        Logger().error("Could not find element " + photon_name)
-
- -
-
- -
- - -
-
-

def list(

photon_package=['PhotonCore', 'PhotonNeuro', 'CustomElements'])

-
- - - - -

Print info about all items that are registered for the PHOTON submodule to the console.

-

Parameters:

-
    -
  • 'photon_package' [list]: - The names of the PHOTON submodules for which the elements should be retrieved
  • -
-
- -
-
@staticmethod
-def list(photon_package: list = PHOTON_REGISTERS):
-    """
-    Print info about all items that are registered for the PHOTON submodule to the console.
-    Parameters:
-    -----------
-    * 'photon_package' [list]:
-      The names of the PHOTON submodules for which the elements should be retrieved
-    """
-    for package in photon_package:
-        content, file_name = PhotonRegister.get_json(package)
-        print('\n' + package + ' (' + file_name + ')')
-        for k, v in sorted(content.items()):
-            class_info, package_type = v
-            print("{:<35} {:<75} {:<5}".format(k, class_info, package_type))
-
- -
-
- -
- - -
-
-

def save(

photon_name, class_str, element_type, photon_package='CustomElements')

-
- - - - -

Save Element information to the JSON file

-

Parameters:

-
    -
  • 'photon_name' [str]: - The string literal with which you want to access the class
  • -
  • 'class_str' [str]: - The namespace of the class, like in the import statement
  • -
  • 'element_type' [str]: - Can be 'Estimator' or 'Transformer'
  • -
  • 'photon_package' [str]: - The photonai module name e.g. photon_core, photon_neuro
  • -
-
- -
-
@staticmethod
-def save(photon_name: str, class_str: str, element_type: str, photon_package: str = "CustomElements"):
-    """
-    Save Element information to the JSON file
-    Parameters:
-    -----------
-    * 'photon_name' [str]:
-      The string literal with which you want to access the class
-    * 'class_str' [str]:
-      The namespace of the class, like in the import statement
-    * 'element_type' [str]:
-      Can be 'Estimator' or 'Transformer'
-    * 'photon_package' [str]:
-      The photonai module name e.g. photon_core, photon_neuro
-    """
-    # register_element and jsonify
-    if element_type != "Estimator" or element_type != "Transformer":
-        Logger().error("Variable element_type must be 'Estimator' or 'Transformer'")
-    duplicate = PhotonRegister.check_duplicate(photon_name, class_str)
-    if not duplicate:
-        content, _ = PhotonRegister.get_json(photon_package)  # load existing json
-        # add new element
-        content[photon_name] = class_str, element_type
-        # write back to file
-        PhotonRegister.write2json(content, photon_package)
-        Logger().info('Adding PipelineElement ' + class_str + ' to ' +
-                      photon_package + ' as "' + photon_name + '".')
-    else:
-        Logger().error('Could not register pipeline element due to duplicates.')
-
- -
-
- -
- - -
-
-

def write2json(

content2write, photon_package)

-
- - - - -

Write json content to file

-

Parameters:

-
    -
  • 'content2write' [dict]: - The new information to attach to the file
  • -
  • 'photon_package' [str]: - The PHOTON submodule name to which the new class belongs, so it is written to the correct json file
  • -
-
- -
-
@staticmethod
-def write2json(content2write: dict, photon_package: str):
-    """
-    Write json content to file
-    Parameters:
-    -----------
-    * 'content2write' [dict]:
-      The new information to attach to the file
-    * 'photon_package' [str]:
-      The PHOTON submodule name to which the new class belongs, so it is written to the correct json file
-    """
-    file_name = os.path.dirname(inspect.getfile(PhotonRegister)) + '/' + photon_package + '.json'
-    # Writing JSON data
-    with open(file_name, 'w') as f:
-        json.dump(content2write, f)
-
- -
-
- -
- -
-
- -
-

class PipelineBranch

- - -

A substream of pipeline elements that is encapsulated e.g. for parallelization

-

Parameters

-
    -
  • 'name' [str]: - Name of the encapsulated item and/or summary of the encapsulated element's functions
  • -
-
- -
-
class PipelineBranch(PipelineElement):
-    """
-     A substream of pipeline elements that is encapsulated e.g. for parallelization
-
-     Parameters
-     ----------
-        * 'name' [str]:
-            Name of the encapsulated item and/or summary of the encapsulated element's functions
-
-        """
-
-    def __init__(self, name):
-
-        super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)
-
-        self.pipeline_elements = []
-
-    def __iadd__(self, pipe_element):
-        """
-        Add an element to the sub pipeline
-        Returns self
-
-        Parameters
-        ----------
-        * 'pipe_element' [PipelineElement or Hyperpipe]:
-            The object to add, being either a transformer or an estimator.
-
-        """
-        self.pipeline_elements.append(pipe_element)
-        self._prepare_pipeline()
-        return self
-
-    def add(self, pipe_element):
-        self.__iadd__(pipe_element)
-
-    def _prepare_pipeline(self):
-        """ Generates sklearn pipeline with all underlying steps """
-        pipeline_steps = []
-
-        for item in self.pipeline_elements:
-            # pipeline_steps.append((item.name, item.base_element))
-            pipeline_steps.append((item.name, item))
-            self._hyperparameters[item.name] = item.hyperparameters
-
-        self.generate_sklearn_hyperparameters()
-        self.base_element = Pipeline(pipeline_steps)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value):
-        """
-        Setting hyperparameters does not make sense, only the items that added can be optimized, not the container (self)
-        """
-        return None
-
-    def generate_config_grid(self):
-        return create_global_config_grid(self.pipeline_elements, self.name)
-
-    def generate_sklearn_hyperparameters(self):
-        """
-        Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-        """
-        self._hyperparameters = {}
-        for element in self.pipeline_elements:
-            for attribute, value_list in element.hyperparameters.items():
-                self._hyperparameters[self.name + '__' + attribute] = value_list
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name)

-
- - - - -

Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)

-

Returns

-

instantiated class object

-
- -
-
def __init__(self, name):
-    super().__init__(name, {}, test_disabled=False, disabled=False, base_element=True)
-    self.pipeline_elements = []
-
- -
-
- -
- - -
-
-

def add(

self, pipe_element)

-
- - - - -
- -
-
def add(self, pipe_element):
-    self.__iadd__(pipe_element)
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls the fit function of the base element

-

Returns

-

self

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls the fit function of the base element
-    Returns
-    ------
-    self
-    """
-    if not self.disabled:
-        obj = self.base_element
-        obj.fit(data, targets)
-        # self.base_element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    return create_global_config_grid(self.pipeline_elements, self.name)
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for element in self.pipeline_elements:
-        for attribute, value_list in element.hyperparameters.items():
-            self._hyperparameters[self.name + '__' + attribute] = value_list
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep: bool=True):
-    """
-    Forwards the get_params request to the wrapped base element
-    """
-    return self.base_element.get_params(deep)
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Calls predict function on the base element.

-

IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM. -This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer. -Sklearn usually expects the last element to predict. -Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after -training only used for transforming.

-
- -
-
def predict(self, data):
-    """
-    Calls predict function on the base element.
-    IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-    This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-    Sklearn usually expects the last element to predict.
-    Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-    training only used for transforming.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        elif hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        else:
-            Logger().error('BaseException. base Element should have function ' +
-                           'predict, or at least transform.')
-            raise BaseException('base Element should have function predict, or at least transform.')
-    else:
-        return data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities -base element needs predict_proba() function, otherwise throw -base exception.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    base element needs predict_proba() function, otherwise throw
-    base exception.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict_proba'):
-            return self.base_element.predict_proba(data)
-        else:
-            Logger().error('BaseException. base Element should have "predict_proba" function.')
-        raise BaseException('base Element should have predict_proba function.')
-    return data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calls the score function on the base element: -Returns a goodness of fit measure or a likelihood of unseen data:

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calls the score function on the base element:
-    Returns a goodness of fit measure or a likelihood of unseen data:
-    """
-    return self.base_element.score(X_test, y_test)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Forwards the set_params request to the wrapped base element -Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper

-
- -
-
def set_params(self, **kwargs):
-    """
-    Forwards the set_params request to the wrapped base element
-    Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-    """
-    # element disable is a construct used for this container only
-    if self._sklearn_disabled in kwargs:
-        self.disabled = kwargs[self._sklearn_disabled]
-        del kwargs[self._sklearn_disabled]
-    elif 'disabled' in kwargs:
-        self.disabled = kwargs['disabled']
-        del kwargs['disabled']
-    self.base_element.set_params(**kwargs)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on the base element.

-

IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT. -This is used if we are using an estimator as a preprocessing step.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on the base element.
-    IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-    This is used if we are using an estimator as a preprocessing step.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        elif hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        else:
-            Logger().error('BaseException: transform-predict-mess')
-            raise BaseException('transform-predict-mess')
-    else:
-        return data
-
- -
-
- -
- -

Instance variables

-
-

var hyperparameters

- - - - -
-
- -
-
-

var pipeline_elements

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- -
-
- -
-

class PipelineElement

- - -

Photon wrapper class for any transformer or predictor element in the pipeline.

-
    -
  1. Saves the hyperparameters that are to be tested and creates a grid of all hyperparameter configurations
  2. -
  3. Enables fast and rapid instantiation of pipeline elements per string identifier, - e.g 'svc' creates an sklearn.svm.SVC object.
  4. -
  5. Attaches a "disable" switch to every element in the pipeline in order to test a complete disable
  6. -
-

Parameters

-
    -
  • 'name' [str]: - A string literal encoding the class to be instantiated
  • -
  • 'hyperparameters' [dict]: - Which values/value range should be tested for the hyperparameter. - In form of "Hyperparameter_name: [array of parameter values to be tested]"
  • -
  • 'test_disabled' [bool]: - If the hyperparameter search should evaluate a complete disabling of the element
  • -
  • 'disabled' [bool]: - If true, the element is currently disabled and does nothing except return the data it received
  • -
  • 'kwargs' [dict]: - Any parameters that should be passed to the object to be instantiated, default parameters
  • -
-
- -
-
class PipelineElement(BaseEstimator):
-    """
-    Photon wrapper class for any transformer or predictor element in the pipeline.
-
-    1. Saves the hyperparameters that are to be tested and creates a grid of all hyperparameter configurations
-    2. Enables fast and rapid instantiation of pipeline elements per string identifier,
-         e.g 'svc' creates an sklearn.svm.SVC object.
-    3. Attaches a "disable" switch to every element in the pipeline in order to test a complete disable
-
-
-    Parameters
-    ----------
-    * 'name' [str]:
-       A string literal encoding the class to be instantiated
-    * 'hyperparameters' [dict]:
-       Which values/value range should be tested for the hyperparameter.
-       In form of "Hyperparameter_name: [array of parameter values to be tested]"
-    * 'test_disabled' [bool]:
-        If the hyperparameter search should evaluate a complete disabling of the element
-    * 'disabled' [bool]:
-        If true, the element is currently disabled and does nothing except return the data it received
-    * 'kwargs' [dict]:
-        Any parameters that should be passed to the object to be instantiated, default parameters
-
-    """
-    # Registering Pipeline Elements
-    ELEMENT_DICTIONARY = PhotonRegister.get_package_info()
-
-    def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
-                 disabled: bool =False, base_element=None,
-                 **kwargs):
-        """
-        Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)
-
-        Returns
-        -------
-        instantiated class object
-        """
-        if hyperparameters is None:
-            hyperparameters = {}
-
-        if not base_element:
-            if name in PipelineElement.ELEMENT_DICTIONARY:
-                try:
-                    desired_class_info = PipelineElement.ELEMENT_DICTIONARY[name]
-                    desired_class_home = desired_class_info[0]
-                    desired_class_name = desired_class_info[1]
-                    imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-                    desired_class = getattr(imported_module, desired_class_name)
-                    base_element = desired_class(**kwargs)
-                    obj = PipelineElement(name, hyperparameters, test_disabled, disabled, base_element)
-                    self.base_element = obj
-                except AttributeError as ae:
-                    Logger().error('ValueError: Could not find according class:'
-                                   + str(PipelineElement.ELEMENT_DICTIONARY[name]))
-                    raise ValueError('Could not find according class:', PipelineElement.ELEMENT_DICTIONARY[name])
-            else:
-                Logger().error('Element not supported right now:' + name)
-                raise NameError('Element not supported right now:', name)
-        else:
-            self.base_element = base_element
-
-
-        # Todo: check if hyperparameters are members of the class
-        # Todo: write method that returns any hyperparameter that could be optimized --> sklearn: get_params.keys
-        # Todo: map any hyperparameter to a possible default list of values to try
-        self.name = name
-        self.test_disabled = test_disabled
-        self._sklearn_disabled = self.name + '__disabled'
-        self._hyperparameters = hyperparameters
-        # check if hyperparameters are already in sklearn style
-        if len(hyperparameters) > 0:
-            key_0 = next(iter(hyperparameters))
-            if self.name not in key_0:
-                self.hyperparameters = hyperparameters
-        self.disabled = disabled
-
-    def copy_me(self):
-        return deepcopy(self)
-
-    @classmethod
-    def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-        """
-        Takes an instantiated object and encapsulates it into the PHOTON structure,
-        add the disabled function and attaches information about the hyperparameters that should be tested
-        """
-        return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value: dict):
-        self.generate_sklearn_hyperparameters(value)
-
-    def generate_config_grid(self):
-        config_dict = create_global_config_dict([self])
-        if len(config_dict) > 0:
-            if self.test_disabled:
-                config_dict.pop(self._sklearn_disabled)
-            config_list = list(ParameterGrid(config_dict))
-            if self.test_disabled:
-                config_list.append({self._sklearn_disabled: True})
-            return config_list
-        else:
-            return []
-
-    def generate_sklearn_hyperparameters(self, value: dict):
-        """
-        Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-        """
-        self._hyperparameters = {}
-        for attribute, value_list in value.items():
-            self._hyperparameters[self.name + '__' + attribute] = value_list
-        if self.test_disabled:
-            self._hyperparameters[self._sklearn_disabled] = [False, True]
-
-    def get_params(self, deep: bool=True):
-        """
-        Forwards the get_params request to the wrapped base element
-        """
-        return self.base_element.get_params(deep)
-
-
-    def set_params(self, **kwargs):
-        """
-        Forwards the set_params request to the wrapped base element
-        Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-        """
-        # element disable is a construct used for this container only
-        if self._sklearn_disabled in kwargs:
-            self.disabled = kwargs[self._sklearn_disabled]
-            del kwargs[self._sklearn_disabled]
-        elif 'disabled' in kwargs:
-            self.disabled = kwargs['disabled']
-            del kwargs['disabled']
-        self.base_element.set_params(**kwargs)
-        return self
-
-    def fit(self, data, targets=None):
-        """
-        Calls the fit function of the base element
-
-        Returns
-        ------
-        self
-        """
-        if not self.disabled:
-            obj = self.base_element
-            obj.fit(data, targets)
-            # self.base_element.fit(data, targets)
-        return self
-
-    def predict(self, data):
-        """
-        Calls predict function on the base element.
-
-        IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-        This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-        Sklearn usually expects the last element to predict.
-        Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-        training only used for transforming.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'predict'):
-                return self.base_element.predict(data)
-            elif hasattr(self.base_element, 'transform'):
-                return self.base_element.transform(data)
-            else:
-                Logger().error('BaseException. base Element should have function ' +
-                               'predict, or at least transform.')
-                raise BaseException('base Element should have function predict, or at least transform.')
-        else:
-            return data
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities
-        base element needs predict_proba() function, otherwise throw
-        base exception.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'predict_proba'):
-                return self.base_element.predict_proba(data)
-            else:
-                Logger().error('BaseException. base Element should have "predict_proba" function.')
-            raise BaseException('base Element should have predict_proba function.')
-        return data
-
-    # def fit_predict(self, data, targets):
-    #     if not self.disabled:
-    #         return self.base_element.fit_predict(data, targets)
-    #     else:
-    #         return data
-
-    def transform(self, data):
-        """
-        Calls transform on the base element.
-
-        IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-        This is used if we are using an estimator as a preprocessing step.
-        """
-        if not self.disabled:
-            if hasattr(self.base_element, 'transform'):
-                return self.base_element.transform(data)
-            elif hasattr(self.base_element, 'predict'):
-                return self.base_element.predict(data)
-            else:
-                Logger().error('BaseException: transform-predict-mess')
-                raise BaseException('transform-predict-mess')
-        else:
-            return data
-
-    def inverse_transform(self, data):
-        """
-        Calls inverse_transform on the base element
-        """
-        if hasattr(self.base_element, 'inverse_transform'):
-            return self.base_element.inverse_transform(data)
-        else:
-            # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-            return data
-
-    # def fit_transform(self, data, targets=None):
-    #     if not self.disabled:
-    #         if hasattr(self.base_element, 'fit_transform'):
-    #             return self.base_element.fit_transform(data, targets)
-    #         elif hasattr(self.base_element, 'transform'):
-    #             self.base_element.fit(data, targets)
-    #             return self.base_element.transform(data)
-    #         # elif hasattr(self.base_element, 'predict'):
-    #         #     self.base_element.fit(data, targets)
-    #         #     return self.base_element.predict(data)
-    #     else:
-    #         return data
-
-    def score(self, X_test, y_test):
-        """
-        Calls the score function on the base element:
-        Returns a goodness of fit measure or a likelihood of unseen data:
-        """
-        return self.base_element.score(X_test, y_test)
-
-    def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-        """Make hyperparameter combinations human readable """
-        if config_name == "disabled" and config_value is False:
-            if return_dict:
-                return {'enabled':True}
-            else:
-                return "enabled = True"
-        else:
-            if return_dict:
-                return {config_name:config_value}
-            else:
-                return config_name + '=' + str(config_value)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, hyperparameters=None, test_disabled=False, disabled=False, base_element=None, **kwargs)

-
- - - - -

Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)

-

Returns

-

instantiated class object

-
- -
-
def __init__(self, name, hyperparameters: dict=None, test_disabled: bool=False,
-             disabled: bool =False, base_element=None,
-             **kwargs):
-    """
-    Takes a string literal and transforms it into an object of the associated class (see PhotonCore.JSON)
-    Returns
-    -------
-    instantiated class object
-    """
-    if hyperparameters is None:
-        hyperparameters = {}
-    if not base_element:
-        if name in PipelineElement.ELEMENT_DICTIONARY:
-            try:
-                desired_class_info = PipelineElement.ELEMENT_DICTIONARY[name]
-                desired_class_home = desired_class_info[0]
-                desired_class_name = desired_class_info[1]
-                imported_module = __import__(desired_class_home, globals(), locals(), desired_class_name, 0)
-                desired_class = getattr(imported_module, desired_class_name)
-                base_element = desired_class(**kwargs)
-                obj = PipelineElement(name, hyperparameters, test_disabled, disabled, base_element)
-                self.base_element = obj
-            except AttributeError as ae:
-                Logger().error('ValueError: Could not find according class:'
-                               + str(PipelineElement.ELEMENT_DICTIONARY[name]))
-                raise ValueError('Could not find according class:', PipelineElement.ELEMENT_DICTIONARY[name])
-        else:
-            Logger().error('Element not supported right now:' + name)
-            raise NameError('Element not supported right now:', name)
-    else:
-        self.base_element = base_element
-    # Todo: check if hyperparameters are members of the class
-    # Todo: write method that returns any hyperparameter that could be optimized --> sklearn: get_params.keys
-    # Todo: map any hyperparameter to a possible default list of values to try
-    self.name = name
-    self.test_disabled = test_disabled
-    self._sklearn_disabled = self.name + '__disabled'
-    self._hyperparameters = hyperparameters
-    # check if hyperparameters are already in sklearn style
-    if len(hyperparameters) > 0:
-        key_0 = next(iter(hyperparameters))
-        if self.name not in key_0:
-            self.hyperparameters = hyperparameters
-    self.disabled = disabled
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls the fit function of the base element

-

Returns

-

self

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls the fit function of the base element
-    Returns
-    ------
-    self
-    """
-    if not self.disabled:
-        obj = self.base_element
-        obj.fit(data, targets)
-        # self.base_element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    config_dict = create_global_config_dict([self])
-    if len(config_dict) > 0:
-        if self.test_disabled:
-            config_dict.pop(self._sklearn_disabled)
-        config_list = list(ParameterGrid(config_dict))
-        if self.test_disabled:
-            config_list.append({self._sklearn_disabled: True})
-        return config_list
-    else:
-        return []
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self, value)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self, value: dict):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for attribute, value_list in value.items():
-        self._hyperparameters[self.name + '__' + attribute] = value_list
-    if self.test_disabled:
-        self._hyperparameters[self._sklearn_disabled] = [False, True]
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep: bool=True):
-    """
-    Forwards the get_params request to the wrapped base element
-    """
-    return self.base_element.get_params(deep)
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Calls predict function on the base element.

-

IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM. -This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer. -Sklearn usually expects the last element to predict. -Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after -training only used for transforming.

-
- -
-
def predict(self, data):
-    """
-    Calls predict function on the base element.
-    IF PREDICT IS NOT AVAILABLE CALLS TRANSFORM.
-    This is for the case that the encapsulated hyperpipe only part of another hyperpipe, and works as a transformer.
-    Sklearn usually expects the last element to predict.
-    Also this is needed in case we are using an autoencoder which is firstly trained by using predict, and after
-    training only used for transforming.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        elif hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        else:
-            Logger().error('BaseException. base Element should have function ' +
-                           'predict, or at least transform.')
-            raise BaseException('base Element should have function predict, or at least transform.')
-    else:
-        return data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities -base element needs predict_proba() function, otherwise throw -base exception.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities
-    base element needs predict_proba() function, otherwise throw
-    base exception.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'predict_proba'):
-            return self.base_element.predict_proba(data)
-        else:
-            Logger().error('BaseException. base Element should have "predict_proba" function.')
-        raise BaseException('base Element should have predict_proba function.')
-    return data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calls the score function on the base element: -Returns a goodness of fit measure or a likelihood of unseen data:

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calls the score function on the base element:
-    Returns a goodness of fit measure or a likelihood of unseen data:
-    """
-    return self.base_element.score(X_test, y_test)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Forwards the set_params request to the wrapped base element -Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper

-
- -
-
def set_params(self, **kwargs):
-    """
-    Forwards the set_params request to the wrapped base element
-    Takes care of the disabled parameter which is additionally attached by the PHOTON wrapper
-    """
-    # element disable is a construct used for this container only
-    if self._sklearn_disabled in kwargs:
-        self.disabled = kwargs[self._sklearn_disabled]
-        del kwargs[self._sklearn_disabled]
-    elif 'disabled' in kwargs:
-        self.disabled = kwargs['disabled']
-        del kwargs['disabled']
-    self.base_element.set_params(**kwargs)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on the base element.

-

IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT. -This is used if we are using an estimator as a preprocessing step.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on the base element.
-    IN CASE THERE IS NO TRANSFORM METHOD, CALLS PREDICT.
-    This is used if we are using an estimator as a preprocessing step.
-    """
-    if not self.disabled:
-        if hasattr(self.base_element, 'transform'):
-            return self.base_element.transform(data)
-        elif hasattr(self.base_element, 'predict'):
-            return self.base_element.predict(data)
-        else:
-            Logger().error('BaseException: transform-predict-mess')
-            raise BaseException('transform-predict-mess')
-    else:
-        return data
-
- -
-
- -
- -

Instance variables

-
-

var disabled

- - - - -
-
- -
-
-

var hyperparameters

- - - - -
-
- -
-
-

var name

- - - - -
-
- -
-
-

var test_disabled

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- -
-
- -
-

class PipelineStacking

- - -

Creates a vertical stacking/parallelization of pipeline items.

-

The object acts as single pipeline element and encapsulates several vertically stacked other pipeline elements, each -child receiving the same input data. The data is iteratively distributed to all children, the results are collected -and horizontally concatenated.

-
- -
-
class PipelineStacking(PipelineElement):
-    """
-    Creates a vertical stacking/parallelization of pipeline items.
-
-    The object acts as single pipeline element and encapsulates several vertically stacked other pipeline elements, each
-    child receiving the same input data. The data is iteratively distributed to all children, the results are collected
-    and horizontally concatenated.
-
-    """
-    def __init__(self, name: str, stacking_elements=None, voting: bool=True):
-        """
-        Creates a new PipelineStacking element.
-        Collects all possible hyperparameter combinations of the children
-
-        Parameters
-        ----------
-        * 'name' [str]:
-            Give the pipeline element a name
-        * 'stacking_elements' [list, optional]:
-            List of pipeline elements that should run in parallel
-        * 'voting' [bool]:
-            If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
-        """
-        super(PipelineStacking, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
-                                               base_element=True)
-
-        self._hyperparameters = {}
-        self.pipe_elements = OrderedDict()
-        self.voting = voting
-        if stacking_elements is not None:
-            for item_to_stack in stacking_elements:
-                self.__iadd__(item_to_stack)
-
-    def __iadd__(self, item):
-        """
-        Adds a new element to the stack.
-        Generates sklearn hyperparameter names in order to set the item's hyperparameters in the optimization process.
-
-        * 'item' [PipelineElement or PipelineBranch or Hyperpipe]:
-            The Element that should be stacked and will run in a vertical parallelization in the original pipe.
-        """
-        self.pipe_elements[item.name] = item
-        self._hyperparameters[item.name] = item.hyperparameters
-
-        # for each configuration
-        tmp_dict = dict(item.hyperparameters)
-        for key, element in tmp_dict.items():
-            if isinstance(item, PipelineElement):
-                self._hyperparameters[self.name + '__' + key] = tmp_dict[key]
-            else:
-                self._hyperparameters[self.name + '__' + item.name + '__' + key] = tmp_dict[key]
-        return self
-
-    def add(self, item):
-        self.__iadd__(item)
-
-    @property
-    def hyperparameters(self):
-        return self._hyperparameters
-
-    @hyperparameters.setter
-    def hyperparameters(self, value):
-        """
-        Setting hyperparameters does not make sense, only the items that added can be optimized, not the container (self)
-        """
-        pass
-
-    def generate_config_grid(self):
-        return create_global_config_grid(self.pipe_elements.values(), self.name)
-
-    def get_params(self, deep=True):
-        all_params = {}
-        for name, element in self.pipe_elements.items():
-            all_params[name] = element.get_params(deep)
-        return all_params
-
-    def set_params(self, **kwargs):
-        """
-        Find the particular child and distribute the params to it
-        """
-        spread_params_dict = {}
-        for k, val in kwargs.items():
-            splitted_k = k.split('__')
-            item_name = splitted_k[0]
-            if item_name not in spread_params_dict:
-                spread_params_dict[item_name] = {}
-            dict_entry = {'__'.join(splitted_k[1::]): val}
-            spread_params_dict[item_name].update(dict_entry)
-
-        for name, params in spread_params_dict.items():
-            if name in self.pipe_elements:
-                self.pipe_elements[name].set_params(**params)
-            else:
-                Logger().error('NameError: Could not find element ' + name)
-                raise NameError('Could not find element ', name)
-        return self
-
-    def fit(self, data, targets=None):
-        """
-        Calls fit iteratively on every child
-        """
-        for name, element in self.pipe_elements.items():
-            # Todo: parallellize fitting
-            element.fit(data, targets)
-        return self
-
-    def predict(self, data):
-        """
-        Iteratively calls predict on every child.
-        """
-        # Todo: strategy for concatenating data from different pipes
-        # todo: parallelize prediction
-        predicted_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            element_transform = element.predict(data)
-            predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-        if self.voting:
-            if hasattr(predicted_data, 'shape'):
-                if len(predicted_data.shape) > 1:
-                    predicted_data = np.mean(predicted_data, axis=1).astype(int)
-        return predicted_data
-
-    def predict_proba(self, data):
-        """
-        Predict probabilities for every pipe element and
-        stack them together. Alternatively, do voting instead.
-        """
-        predicted_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            element_transform = element.predict_proba(data)
-            predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-        if self.voting:
-            if hasattr(predicted_data, 'shape'):
-                if len(predicted_data.shape) > 1:
-                    predicted_data = np.mean(predicted_data, axis=1).astype(int)
-        return predicted_data
-
-    def transform(self, data):
-        """
-        Calls transform on every child.
-
-        If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
-        """
-        transformed_data = np.empty((0, 0))
-        for name, element in self.pipe_elements.items():
-            # if it is a hyperpipe with a final estimator, we want to use predict:
-            if hasattr(element, 'pipe'):
-                if element.overwrite_x is not None:
-                    element_data = element.overwrite_x
-                else:
-                    element_data = data
-                if element.pipe._final_estimator:
-                    element_transform = element.predict(element_data)
-                else:
-                    # if it is just a preprocessing pipe we want to use transform
-                    element_transform = element.transform(element_data)
-            else:
-                raise "I dont know what todo!"
-
-            transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-
-        return transformed_data
-
-    # def fit_predict(self, data, targets):
-    #     predicted_data = None
-    #     for name, element in self.pipe_elements.items():
-    #         element_transform = element.fit_predict(data)
-    #         predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    #     return predicted_data
-    #
-    # def fit_transform(self, data, targets=None):
-    #     transformed_data = np.empty((0, 0))
-    #     for name, element in self.pipe_elements.items():
-    #         # if it is a hyperpipe with a final estimator, we want to use predict:
-    #         if hasattr(element, 'pipe'):
-    #             if element.pipe._final_estimator:
-    #                 element.fit(data, targets)
-    #                 element_transform = element.predict(data)
-    #             else:
-    #                 # if it is just a preprocessing pipe we want to use transform
-    #                 element.fit(data)
-    #                 element_transform = element.transform(data)
-    #             transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-    #     return transformed_data
-
-    @classmethod
-    def stack_data(cls, a, b):
-        """
-        Helper method to horizontally join the outcome of each child
-
-        Parameters
-        ----------
-        * 'a' [ndarray]:
-            The existing matrix
-        * 'b' [ndarray]:
-            The matrix that is to be attached horizontally
-
-        Returns
-        -------
-        New matrix, that is a and b horizontally joined
-
-        """
-        if not a.any():
-            a = b
-        else:
-            # Todo: check for right dimensions!
-            if a.ndim == 1 and b.ndim == 1:
-                a = np.column_stack((a, b))
-            else:
-                b = np.reshape(b, (b.shape[0], 1))
-                a = np.concatenate((a, b), 1)
-        return a
-
-    def score(self, X_test, y_test):
-        """
-        Calculate accuracy for predictions made with this object.
-        This function should probably never be called.
-
-        """
-        # Todo: invent strategy for this ?
-        # raise BaseException('PipelineStacking.score should probably never be reached.')
-        # return 16
-        predicted = self.predict(X_test)
-
-        return accuracy_score(y_test, predicted)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Static methods

- -
-
-

def __init__(

self, name, stacking_elements=None, voting=True)

-
- - - - -

Creates a new PipelineStacking element. -Collects all possible hyperparameter combinations of the children

-

Parameters

-
    -
  • 'name' [str]: - Give the pipeline element a name
  • -
  • 'stacking_elements' [list, optional]: - List of pipeline elements that should run in parallel
  • -
  • 'voting' [bool]: - If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
  • -
-
- -
-
def __init__(self, name: str, stacking_elements=None, voting: bool=True):
-    """
-    Creates a new PipelineStacking element.
-    Collects all possible hyperparameter combinations of the children
-    Parameters
-    ----------
-    * 'name' [str]:
-        Give the pipeline element a name
-    * 'stacking_elements' [list, optional]:
-        List of pipeline elements that should run in parallel
-    * 'voting' [bool]:
-        If true, the predictions of the encapsulated pipeline elements are joined to a single prediction
-    """
-    super(PipelineStacking, self).__init__(name, hyperparameters={}, test_disabled=False, disabled=False,
-                                           base_element=True)
-    self._hyperparameters = {}
-    self.pipe_elements = OrderedDict()
-    self.voting = voting
-    if stacking_elements is not None:
-        for item_to_stack in stacking_elements:
-            self.__iadd__(item_to_stack)
-
- -
-
- -
- - -
-
-

def add(

self, item)

-
- - - - -
- -
-
def add(self, item):
-    self.__iadd__(item)
-
- -
-
- -
- - -
-
-

def copy_me(

self)

-
- - - - -
- -
-
def copy_me(self):
-    return deepcopy(self)
-
- -
-
- -
- - -
-
-

def fit(

self, data, targets=None)

-
- - - - -

Calls fit iteratively on every child

-
- -
-
def fit(self, data, targets=None):
-    """
-    Calls fit iteratively on every child
-    """
-    for name, element in self.pipe_elements.items():
-        # Todo: parallellize fitting
-        element.fit(data, targets)
-    return self
-
- -
-
- -
- - -
-
-

def generate_config_grid(

self)

-
- - - - -
- -
-
def generate_config_grid(self):
-    return create_global_config_grid(self.pipe_elements.values(), self.name)
-
- -
-
- -
- - -
-
-

def generate_sklearn_hyperparameters(

self, value)

-
- - - - -

Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value

-
- -
-
def generate_sklearn_hyperparameters(self, value: dict):
-    """
-    Generates a dictionary according to the sklearn convention of element_name__parameter_name: parameter_value
-    """
-    self._hyperparameters = {}
-    for attribute, value_list in value.items():
-        self._hyperparameters[self.name + '__' + attribute] = value_list
-    if self.test_disabled:
-        self._hyperparameters[self._sklearn_disabled] = [False, True]
-
- -
-
- -
- - -
-
-

def get_params(

self, deep=True)

-
- - - - -

Forwards the get_params request to the wrapped base element

-
- -
-
def get_params(self, deep=True):
-    all_params = {}
-    for name, element in self.pipe_elements.items():
-        all_params[name] = element.get_params(deep)
-    return all_params
-
- -
-
- -
- - -
-
-

def inverse_transform(

self, data)

-
- - - - -

Calls inverse_transform on the base element

-
- -
-
def inverse_transform(self, data):
-    """
-    Calls inverse_transform on the base element
-    """
-    if hasattr(self.base_element, 'inverse_transform'):
-        return self.base_element.inverse_transform(data)
-    else:
-        # raise Warning('Element ' + self.name + ' has no method inverse_transform')
-        return data
-
- -
-
- -
- - -
-
-

def predict(

self, data)

-
- - - - -

Iteratively calls predict on every child.

-
- -
-
def predict(self, data):
-    """
-    Iteratively calls predict on every child.
-    """
-    # Todo: strategy for concatenating data from different pipes
-    # todo: parallelize prediction
-    predicted_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        element_transform = element.predict(data)
-        predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    if self.voting:
-        if hasattr(predicted_data, 'shape'):
-            if len(predicted_data.shape) > 1:
-                predicted_data = np.mean(predicted_data, axis=1).astype(int)
-    return predicted_data
-
- -
-
- -
- - -
-
-

def predict_proba(

self, data)

-
- - - - -

Predict probabilities for every pipe element and -stack them together. Alternatively, do voting instead.

-
- -
-
def predict_proba(self, data):
-    """
-    Predict probabilities for every pipe element and
-    stack them together. Alternatively, do voting instead.
-    """
-    predicted_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        element_transform = element.predict_proba(data)
-        predicted_data = PipelineStacking.stack_data(predicted_data, element_transform)
-    if self.voting:
-        if hasattr(predicted_data, 'shape'):
-            if len(predicted_data.shape) > 1:
-                predicted_data = np.mean(predicted_data, axis=1).astype(int)
-    return predicted_data
-
- -
-
- -
- - -
-
-

def prettify_config_output(

self, config_name, config_value, return_dict=False)

-
- - - - -

Make hyperparameter combinations human readable

-
- -
-
def prettify_config_output(self, config_name: str, config_value, return_dict:bool=False):
-    """Make hyperparameter combinations human readable """
-    if config_name == "disabled" and config_value is False:
-        if return_dict:
-            return {'enabled':True}
-        else:
-            return "enabled = True"
-    else:
-        if return_dict:
-            return {config_name:config_value}
-        else:
-            return config_name + '=' + str(config_value)
-
- -
-
- -
- - -
-
-

def score(

self, X_test, y_test)

-
- - - - -

Calculate accuracy for predictions made with this object. -This function should probably never be called.

-
- -
-
def score(self, X_test, y_test):
-    """
-    Calculate accuracy for predictions made with this object.
-    This function should probably never be called.
-    """
-    # Todo: invent strategy for this ?
-    # raise BaseException('PipelineStacking.score should probably never be reached.')
-    # return 16
-    predicted = self.predict(X_test)
-    return accuracy_score(y_test, predicted)
-
- -
-
- -
- - -
-
-

def set_params(

self, **kwargs)

-
- - - - -

Find the particular child and distribute the params to it

-
- -
-
def set_params(self, **kwargs):
-    """
-    Find the particular child and distribute the params to it
-    """
-    spread_params_dict = {}
-    for k, val in kwargs.items():
-        splitted_k = k.split('__')
-        item_name = splitted_k[0]
-        if item_name not in spread_params_dict:
-            spread_params_dict[item_name] = {}
-        dict_entry = {'__'.join(splitted_k[1::]): val}
-        spread_params_dict[item_name].update(dict_entry)
-    for name, params in spread_params_dict.items():
-        if name in self.pipe_elements:
-            self.pipe_elements[name].set_params(**params)
-        else:
-            Logger().error('NameError: Could not find element ' + name)
-            raise NameError('Could not find element ', name)
-    return self
-
- -
-
- -
- - -
-
-

def transform(

self, data)

-
- - - - -

Calls transform on every child.

-

If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.

-
- -
-
def transform(self, data):
-    """
-    Calls transform on every child.
-    If the encapsulated child is a hyperpipe, also calls predict on the last element in the pipeline.
-    """
-    transformed_data = np.empty((0, 0))
-    for name, element in self.pipe_elements.items():
-        # if it is a hyperpipe with a final estimator, we want to use predict:
-        if hasattr(element, 'pipe'):
-            if element.overwrite_x is not None:
-                element_data = element.overwrite_x
-            else:
-                element_data = data
-            if element.pipe._final_estimator:
-                element_transform = element.predict(element_data)
-            else:
-                # if it is just a preprocessing pipe we want to use transform
-                element_transform = element.transform(element_data)
-        else:
-            raise "I dont know what todo!"
-        transformed_data = PipelineStacking.stack_data(transformed_data, element_transform)
-    return transformed_data
-
- -
-
- -
- -

Instance variables

-
-

var hyperparameters

- -

- Inheritance: - PipelineElement.hyperparameters -

- - - -
-
- -
-
-

var pipe_elements

- - - - -
-
- -
-
-

var voting

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, name, base_element, hyperparameters, test_disabled=False, disabled=False, **kwargs)

-
- - - - -

Takes an instantiated object and encapsulates it into the PHOTON structure, -add the disabled function and attaches information about the hyperparameters that should be tested

-
- -
-
@classmethod
-def create(cls, name, base_element, hyperparameters: dict, test_disabled=False, disabled=False, **kwargs):
-    """
-    Takes an instantiated object and encapsulates it into the PHOTON structure,
-    add the disabled function and attaches information about the hyperparameters that should be tested
-    """
-    return PipelineElement(name, hyperparameters, test_disabled, disabled, base_element=base_element, **kwargs)
-
- -
-
- -
- - -
-
-

def stack_data(

cls, a, b)

-
- - - - -

Helper method to horizontally join the outcome of each child

-

Parameters

-
    -
  • 'a' [ndarray]: - The existing matrix
  • -
  • 'b' [ndarray]: - The matrix that is to be attached horizontally
  • -
-

Returns

-

New matrix, that is a and b horizontally joined

-
- -
-
@classmethod
-def stack_data(cls, a, b):
-    """
-    Helper method to horizontally join the outcome of each child
-    Parameters
-    ----------
-    * 'a' [ndarray]:
-        The existing matrix
-    * 'b' [ndarray]:
-        The matrix that is to be attached horizontally
-    Returns
-    -------
-    New matrix, that is a and b horizontally joined
-    """
-    if not a.any():
-        a = b
-    else:
-        # Todo: check for right dimensions!
-        if a.ndim == 1 and b.ndim == 1:
-            a = np.column_stack((a, b))
-        else:
-            b = np.reshape(b, (b.shape[0], 1))
-            a = np.concatenate((a, b), 1)
-    return a
-
- -
-
- -
- -
-
- -
-

class RandomGridSearchOptimizer

- - -
- -
-
class RandomGridSearchOptimizer(GridSearchOptimizer):
-
-    def __init__(self, k=None):
-        super(RandomGridSearchOptimizer, self).__init__()
-        self.k = k
-
-    def prepare(self, pipeline_elements):
-        super(RandomGridSearchOptimizer, self).prepare(pipeline_elements)
-        self.param_grid = list(self.param_grid)
-        # create random chaos in list
-        np.random.shuffle(self.param_grid)
-        if self.k is not None:
-            self.param_grid = self.param_grid[0:self.k]
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, k=None)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, k=None):
-    super(RandomGridSearchOptimizer, self).__init__()
-    self.k = k
-
- -
-
- -
- - -
-
-

def evaluate_recent_performance(

self, config, performance)

-
- - - - -
- -
-
def evaluate_recent_performance(self, config, performance):
-    # influence return value of next_config
-    pass
-
- -
-
- -
- - -
-
-

def next_config_generator(

self)

-
- - - - -
- -
-
def next_config_generator(self):
-    for parameters in self.param_grid:
-        yield parameters
-
- -
-
- -
- - -
-
-

def prepare(

self, pipeline_elements)

-
- - - - -
- -
-
def prepare(self, pipeline_elements):
-    super(RandomGridSearchOptimizer, self).prepare(pipeline_elements)
-    self.param_grid = list(self.param_grid)
-    # create random chaos in list
-    np.random.shuffle(self.param_grid)
-    if self.k is not None:
-        self.param_grid = self.param_grid[0:self.k]
-
- -
-
- -
- -

Instance variables

-
-

var k

- - - - -
-
- -
-
-
- -
-

class TimeBoxedRandomGridSearchOptimizer

- - -
- -
-
class TimeBoxedRandomGridSearchOptimizer(RandomGridSearchOptimizer):
-
-    def __init__(self, limit_in_minutes=60):
-        super(TimeBoxedRandomGridSearchOptimizer, self).__init__()
-        self.limit_in_minutes = limit_in_minutes
-        self.start_time = None
-        self.end_time = None
-
-    def prepare(self, pipeline_elements):
-        super(TimeBoxedRandomGridSearchOptimizer, self).prepare(pipeline_elements)
-        self.start_time = None
-
-    def next_config_generator(self):
-        if self.start_time is None:
-            self.start_time = datetime.datetime.now()
-            self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
-        for parameters in super(TimeBoxedRandomGridSearchOptimizer, self).next_config_generator():
-            if datetime.datetime.now() < self.end_time:
-                yield parameters
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, limit_in_minutes=60)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, limit_in_minutes=60):
-    super(TimeBoxedRandomGridSearchOptimizer, self).__init__()
-    self.limit_in_minutes = limit_in_minutes
-    self.start_time = None
-    self.end_time = None
-
- -
-
- -
- - -
-
-

def evaluate_recent_performance(

self, config, performance)

-
- - - - -
- -
-
def evaluate_recent_performance(self, config, performance):
-    # influence return value of next_config
-    pass
-
- -
-
- -
- - -
-
-

def next_config_generator(

self)

-
- - - - -
- -
-
def next_config_generator(self):
-    if self.start_time is None:
-        self.start_time = datetime.datetime.now()
-        self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
-    for parameters in super(TimeBoxedRandomGridSearchOptimizer, self).next_config_generator():
-        if datetime.datetime.now() < self.end_time:
-            yield parameters
-
- -
-
- -
- - -
-
-

def prepare(

self, pipeline_elements)

-
- - - - -
- -
-
def prepare(self, pipeline_elements):
-    super(TimeBoxedRandomGridSearchOptimizer, self).prepare(pipeline_elements)
-    self.start_time = None
-
- -
-
- -
- -

Instance variables

-
-

var end_time

- - - - -
-
- -
-
-

var limit_in_minutes

- - - - -
-
- -
-
-

var start_time

- - - - -
-
- -
-
-
- - -
- -
-
- -
- - diff --git a/photonai/documentation/photonai/investigator/index.html b/photonai/documentation/photonai/investigator/index.html deleted file mode 100644 index f1ebaea6..00000000 --- a/photonai/documentation/photonai/investigator/index.html +++ /dev/null @@ -1,1428 +0,0 @@ - - - - - - photonai.investigator API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai.investigator module

-

PHOTON Investigator delivers a web-based tool for exploring the hyperparameter search results.

- - -
-
"""
-PHOTON Investigator delivers a web-based tool for exploring the hyperparameter search results.
-"""
-
-from .Investigator import Investigator
-
-__all__ = ("Investigator")
-
- -
- -
- -
- - -

Classes

- -
-

class Investigator

- - -

Instantiates a Flask website that shows you the results of the hyperparameter search, the best configuration, -all of its performances etc.

-
- -
-
class Investigator:
-    """
-    Instantiates a Flask website that shows you the results of the hyperparameter search, the best configuration,
-    all of its performances etc.
-    """
-
-    @staticmethod
-    def __build_url(storage: str, name: str):
-        """
-        creates a localhost url for displaying a pipeline according to its source (working memory, file or mongodb)
-        """
-        url = "http://localhost:7273/pipeline/" + storage + "/" + name
-        return url
-
-    @staticmethod
-    def show(pipe: Hyperpipe):
-        """
-        Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space
-
-        Parameters
-        ----------
-        * 'pipe' [Hyperpipe]:
-            The Hyperpipe object that has performed hyperparameter search
-
-        """
-
-        assert isinstance(pipe, Hyperpipe), "Investigator.show needs an object of type Hyperpipe"
-        assert pipe is not None, "Investigator.show needs an object of Hyperpipe, is None"
-        assert pipe.result_tree is not None, "Investigator.show needs an Hyperpipe that is already optimized, so it can show the result tree"
-        # make sure that Flask is running
-        FlaskManager().set_pipe_object(pipe.name, pipe.result_tree)
-        url = Investigator.__build_url("a", pipe.name)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-    @staticmethod
-    def load_from_db(mongo_connect_url: str, pipe_name: str):
-        """
-        Opens the PHOTON investigator and
-        loads a hyperpipe's performance results from a MongoDB instance
-
-        Parameters
-        ---------
-        * 'mongo_connect_url' [str]:
-            The MongoDB connection string including the database name
-        * 'pipe_name' [str]:
-            The name of the pipeline to load
-        """
-        FlaskManager().set_mongo_db_url(mongo_connect_url)
-        url = Investigator.__build_url("m", pipe_name)
-        Logger().info("Your url is: " + url)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-
-    @staticmethod
-    def load_many_from_db(mongo_connect_url: str, pipe_names: list):
-        """
-        Opens the PHOTON investigator and
-        loads a hyperpipe performance results from a MongoDB instance
-
-        Parameters
-        ---------
-        * 'mongo_connect_url' [str]:
-            The MongoDB connection string including the database name
-        * 'pipe_names' [list]:
-            A list of the hyperpipe objects to load
-        """
-
-        FlaskManager().set_mongo_db_url(mongo_connect_url)
-        for pipe in pipe_names:
-            url = Investigator.__build_url("m", pipe)
-            Logger().info("Your url is: " + url)
-        FlaskManager().run_app()
-
-    @staticmethod
-    def load_from_file(name: str, file_url: str):
-        """
-        Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-
-        Parameters
-        ----------
-        * 'name' [str]:
-            The name of the hyperpipe object that you want to load
-        * 'file_url' [str]:
-            The path to the file in which the hyperparameter search results are encoded.
-        """
-        assert os.path.isfile(file_url), "File" + file_url + " does not exist or is not a file. Please give absolute path."
-        FlaskManager().set_pipe_file(name, file_url)
-        url = Investigator.__build_url("f", name)
-        Investigator.__delayed_browser(url)
-        FlaskManager().run_app()
-
-    # @staticmethod
-    # def load_files(file_list: list):
-    #     """
-    #        Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-    #
-    #        Parameters
-    #        ----------
-    #        * 'file_url' [str]:
-    #            The path to the file in which the hyperparameter search results are encoded.
-    #     """
-    #     for file_url in file_list:
-    #         Investigator.load_from_file("" file_url)
-
-    @staticmethod
-    def __open_browser(url):
-        # we delay the browser opening for 2 seconds so that flask server can start in the meanwhile
-        slp(2)
-        webbrowser.open(url)
-
-    @staticmethod
-    def __delayed_browser(url):
-        Investigator.__open_browser(url)
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def load_from_db(

mongo_connect_url, pipe_name)

-
- - - - -

Opens the PHOTON investigator and -loads a hyperpipe's performance results from a MongoDB instance

-

Parameters

-
    -
  • 'mongo_connect_url' [str]: - The MongoDB connection string including the database name
  • -
  • 'pipe_name' [str]: - The name of the pipeline to load
  • -
-
- -
-
@staticmethod
-def load_from_db(mongo_connect_url: str, pipe_name: str):
-    """
-    Opens the PHOTON investigator and
-    loads a hyperpipe's performance results from a MongoDB instance
-    Parameters
-    ---------
-    * 'mongo_connect_url' [str]:
-        The MongoDB connection string including the database name
-    * 'pipe_name' [str]:
-        The name of the pipeline to load
-    """
-    FlaskManager().set_mongo_db_url(mongo_connect_url)
-    url = Investigator.__build_url("m", pipe_name)
-    Logger().info("Your url is: " + url)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def load_from_file(

name, file_url)

-
- - - - -

Opens the PHOTON investigator and loads the hyperpipe search results from the file path

-

Parameters

-
    -
  • 'name' [str]: - The name of the hyperpipe object that you want to load
  • -
  • 'file_url' [str]: - The path to the file in which the hyperparameter search results are encoded.
  • -
-
- -
-
@staticmethod
-def load_from_file(name: str, file_url: str):
-    """
-    Opens the PHOTON investigator and loads the hyperpipe search results from the file path
-    Parameters
-    ----------
-    * 'name' [str]:
-        The name of the hyperpipe object that you want to load
-    * 'file_url' [str]:
-        The path to the file in which the hyperparameter search results are encoded.
-    """
-    assert os.path.isfile(file_url), "File" + file_url + " does not exist or is not a file. Please give absolute path."
-    FlaskManager().set_pipe_file(name, file_url)
-    url = Investigator.__build_url("f", name)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def load_many_from_db(

mongo_connect_url, pipe_names)

-
- - - - -

Opens the PHOTON investigator and -loads a hyperpipe performance results from a MongoDB instance

-

Parameters

-
    -
  • 'mongo_connect_url' [str]: - The MongoDB connection string including the database name
  • -
  • 'pipe_names' [list]: - A list of the hyperpipe objects to load
  • -
-
- -
-
@staticmethod
-def load_many_from_db(mongo_connect_url: str, pipe_names: list):
-    """
-    Opens the PHOTON investigator and
-    loads a hyperpipe performance results from a MongoDB instance
-    Parameters
-    ---------
-    * 'mongo_connect_url' [str]:
-        The MongoDB connection string including the database name
-    * 'pipe_names' [list]:
-        A list of the hyperpipe objects to load
-    """
-    FlaskManager().set_mongo_db_url(mongo_connect_url)
-    for pipe in pipe_names:
-        url = Investigator.__build_url("m", pipe)
-        Logger().info("Your url is: " + url)
-    FlaskManager().run_app()
-
- -
-
- -
- - -
-
-

def show(

pipe)

-
- - - - -

Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space

-

Parameters

-
    -
  • 'pipe' [Hyperpipe]: - The Hyperpipe object that has performed hyperparameter search
  • -
-
- -
-
@staticmethod
-def show(pipe: Hyperpipe):
-    """
-    Opens the PHOTON investigator and shows the hyperpipe's hyperparameter search performance from working space
-    Parameters
-    ----------
-    * 'pipe' [Hyperpipe]:
-        The Hyperpipe object that has performed hyperparameter search
-    """
-    assert isinstance(pipe, Hyperpipe), "Investigator.show needs an object of type Hyperpipe"
-    assert pipe is not None, "Investigator.show needs an object of Hyperpipe, is None"
-    assert pipe.result_tree is not None, "Investigator.show needs an Hyperpipe that is already optimized, so it can show the result tree"
-    # make sure that Flask is running
-    FlaskManager().set_pipe_object(pipe.name, pipe.result_tree)
-    url = Investigator.__build_url("a", pipe.name)
-    Investigator.__delayed_browser(url)
-    FlaskManager().run_app()
-
- -
-
- -
- -
-
- -
- -
-
- -
- - diff --git a/photonai/documentation/photonai/optimization/index.html b/photonai/documentation/photonai/optimization/index.html deleted file mode 100644 index 9a535867..00000000 --- a/photonai/documentation/photonai/optimization/index.html +++ /dev/null @@ -1,2172 +0,0 @@ - - - - - - photonai.optimization API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai.optimization module

-

PHOTON Classes for defining the hyperparameter search space and optimization strategies

- - -
-
""" PHOTON Classes for defining the hyperparameter search space and optimization strategies"""
-
-from .OptimizationStrategies import GridSearchOptimizer, RandomGridSearchOptimizer, TimeBoxedRandomGridSearchOptimizer
-from .Hyperparameters import BooleanSwitch, FloatRange, IntegerRange, Categorical
-
-__all__ = ("GridSearchOptimizer",
-           "RandomGridSearchOptimizer",
-           "TimeBoxedRandomGridSearchOptimizer",
-           "BooleanSwitch",
-           "FloatRange",
-           "IntegerRange",
-           "Categorical")
-
- -
- -
- -
- - -

Classes

- -
-

class BooleanSwitch

- - -

Class for defining a boolean hyperparameter, when both options should be tested in hyperparameter optimization.

-

Parameters

-
    -
  • 'values' [bool]: - will return both True, and False
  • -
-
- -
-
class BooleanSwitch(PhotonHyperparam):
-    """
-      Class for defining a boolean hyperparameter, when both options should be tested in hyperparameter optimization.
-
-      Parameters
-      ----------
-      * 'values' [bool]:
-         will return both True, and False
-
-    """
-
-    def __init__(self):
-        self.values = [True, False]
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • BooleanSwitch
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.PhotonHyperparam
  • -
  • builtins.object
  • -
-

Static methods

- -
-
-

def __init__(

self)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self):
-    self.values = [True, False]
-
- -
-
- -
- -

Instance variables

-
-

var values

- - - - -
-
- -
-
-
- -
-

class Categorical

- - -

Class for defining a definite list of hyperparameter values. -Can be used for categorical values, but also for numbers.

-

Parameters

-
    -
  • 'values' [list]: - definite list of hyperparameter values
  • -
-
- -
-
class Categorical(PhotonHyperparam):
-    """
-      Class for defining a  definite list of hyperparameter values.
-      Can be used for categorical values, but also for numbers.
-
-      Parameters
-      ----------
-      * 'values' [list]:
-         definite list of hyperparameter values
-
-    """
-
-    def __init__(self, values: list):
-        self.values = values
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • Categorical
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.PhotonHyperparam
  • -
  • builtins.object
  • -
-

Static methods

- -
-
-

def __init__(

self, values)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, values: list):
-    self.values = values
-
- -
-
- -
- -

Instance variables

-
-

var values

- - - - -
-
- -
-
-
- -
-

class FloatRange

- - -

Class for easily creating a range of integer numbers to be tested in hyperparameter optimization.

-

Parameters

-
    -
  • -

    'start' [number]: - The start value for generating the number interval. - The resulting interval includes the value, default is 0.

    -
  • -
  • -

    'stop' [number]: - The stop value for generating the number interval.

    -
  • -
  • -

    if range_type == "range": - the end value is not included in the interval (see documentation of numpy.arange).

    -
  • -
  • if range_type == "linspace" - the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.linspace).
  • -
  • -

    if range_type == "logspace" - the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.logspace).

    -
  • -
  • -

    'range_type' [str]: - Which method to use for generating the number interval. - Possible options:

    -
  • -
  • -

    "range": numpy.arange is used to generate a list of values separated by the same step width.

    -
  • -
  • "linspace": numpy.linspace is used to generate a certain number of values between start and stop.
  • -
  • -

    "logspace": numpy.logspace is used to generate a logarithmically distributed range of a certain length.

    -
  • -
  • -

    'step' [number, default=None, optional]: - if range_type == 'range', the spacing between values.

    -
  • -
  • -

    'num' [int, default=None, optional]: - if range_type == 'linspace' or range_type == 'logspace', the number of samples to generate.

    -
  • -
  • -

    'kwargs' [dict, optional]: - Further parameters that should be passed to the numpy function chosen with range_type.

    -
  • -
-
- -
-
class FloatRange(NumberRange):
-    """
-          Class for easily creating a range of integer numbers to be tested in hyperparameter optimization.
-
-          Parameters
-          ----------
-          * 'start' [number]:
-             The start value for generating the number interval.
-             The resulting interval includes the value, default is 0.
-
-          * 'stop' [number]:
-             The stop value for generating the number interval.
-
-             - if range_type == "range":
-               the end value is not included in the interval (see documentation of numpy.arange).
-             - if range_type == "linspace"
-               the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.linspace).
-            - if range_type == "logspace"
-               the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.logspace).
-
-          * 'range_type' [str]:
-             Which method to use for generating the number interval.
-             Possible options:
-
-             - "range": numpy.arange is used to generate a list of values separated by the same step width.
-             - "linspace": numpy.linspace is used to generate a certain number of values between start and stop.
-             - "logspace": numpy.logspace is used to generate a logarithmically distributed range of a certain length.
-
-          * 'step' [number, default=None, optional]:
-            if range_type == 'range', the spacing between values.
-
-          * 'num' [int, default=None, optional]:
-            if range_type == 'linspace' or range_type == 'logspace', the number of samples to generate.
-
-          * 'kwargs' [dict, optional]:
-            Further parameters that should be passed to the numpy function chosen with range_type.
-        """
-
-    def __init__(self, start, stop, range_type='range', step=None, num=None, **kwargs):
-            super().__init__(start, stop, range_type, step, num, np.float32, **kwargs)
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • FloatRange
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.NumberRange
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.PhotonHyperparam
  • -
  • builtins.object
  • -
-

Static methods

- -
-
-

def __init__(

self, start, stop, range_type='range', step=None, num=None, **kwargs)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, start, stop, range_type='range', step=None, num=None, **kwargs):
-        super().__init__(start, stop, range_type, step, num, np.float32, **kwargs)
-
- -
-
- -
- - -
-
-

def transform(

self)

-
- - - - -
- -
-
def transform(self):
-    values = []
-    if self.range_type == "range":
-        if not self.step:
-            values = np.arange(self.start, self.stop, dtype=self.num_type, **self.range_params)
-        else:
-            self.values = np.arange(self.start, self.stop, self.step, dtype=self.num_type, **self.range_params)
-    elif self.range_type == "linspace":
-        if self.num:
-            values = np.linspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params)
-        else:
-            values = np.linspace(self.start, self.stop, dtype=self.num_type, **self.range_params)
-    elif self.range_type == "logspace":
-        if self.num:
-            values = np.logspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params)
-        else:
-            values = np.logspace(self.start, self.stop, dtype=self.num_type, **self.range_params)
-    # convert to python datatype because mongodb needs it
-    if self.num_type == np.int32:
-        self.values = [int(i) for i in values]
-    elif self.num_type == np.float32:
-        self.values = [float(i) for i in values]
-
- -
-
- -
- -
-
- -
-

class GridSearchOptimizer

- - -
- -
-
class GridSearchOptimizer(object):
-    def __init__(self):
-        self.param_grid = []
-        self.pipeline_elements = None
-        self.parameter_iterable = None
-        self.next_config = self.next_config_generator()
-
-    def prepare(self, pipeline_elements):
-        self.pipeline_elements = pipeline_elements
-        self.next_config = self.next_config_generator()
-        self.param_grid = create_global_config_grid(self.pipeline_elements)
-        Logger().info("Grid Search generated " + str(len(self.param_grid)) + " configurations")
-
-    def next_config_generator(self):
-        for parameters in self.param_grid:
-            yield parameters
-
-    def evaluate_recent_performance(self, config, performance):
-        # influence return value of next_config
-        pass
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self):
-    self.param_grid = []
-    self.pipeline_elements = None
-    self.parameter_iterable = None
-    self.next_config = self.next_config_generator()
-
- -
-
- -
- - -
-
-

def evaluate_recent_performance(

self, config, performance)

-
- - - - -
- -
-
def evaluate_recent_performance(self, config, performance):
-    # influence return value of next_config
-    pass
-
- -
-
- -
- - -
-
-

def next_config_generator(

self)

-
- - - - -
- -
-
def next_config_generator(self):
-    for parameters in self.param_grid:
-        yield parameters
-
- -
-
- -
- - -
-
-

def prepare(

self, pipeline_elements)

-
- - - - -
- -
-
def prepare(self, pipeline_elements):
-    self.pipeline_elements = pipeline_elements
-    self.next_config = self.next_config_generator()
-    self.param_grid = create_global_config_grid(self.pipeline_elements)
-    Logger().info("Grid Search generated " + str(len(self.param_grid)) + " configurations")
-
- -
-
- -
- -

Instance variables

-
-

var next_config

- - - - -
-
- -
-
-

var param_grid

- - - - -
-
- -
-
-

var parameter_iterable

- - - - -
-
- -
-
-

var pipeline_elements

- - - - -
-
- -
-
-
- -
-

class IntegerRange

- - -

Class for easily creating a range of integer numbers to be tested in hyperparameter optimization.

-

Parameters

-
    -
  • -

    'start' [number]: - The start value for generating the number interval. - The resulting interval includes the value, default is 0.

    -
  • -
  • -

    'stop' [number]: - The stop value for generating the number interval.

    -
  • -
  • -

    if range_type == "range": - the end value is not included in the interval (see documentation of numpy.arange).

    -
  • -
  • if range_type == "linspace" - the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.linspace).
  • -
  • -

    if range_type == "logspace" - the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.logspace).

    -
  • -
  • -

    'range_type' [str]: - Which method to use for generating the number interval. - Possible options:

    -
  • -
  • -

    "range": numpy.arange is used to generate a list of values separated by the same step width.

    -
  • -
  • "linspace": numpy.linspace is used to generate a certain number of values between start and stop.
  • -
  • -

    "logspace": numpy.logspace is used to generate a logarithmically distributed range of a certain length.

    -
  • -
  • -

    'step' [number, default=None, optional]: - if range_type == 'range', the spacing between values.

    -
  • -
  • -

    'num' [int, default=None, optional]: - if range_type == 'linspace' or range_type == 'logspace', the number of samples to generate.

    -
  • -
  • -

    'kwargs' [dict, optional]: - Further parameters that should be passed to the numpy function chosen with range_type.

    -
  • -
-
- -
-
class IntegerRange(NumberRange):
-    """
-         Class for easily creating a range of integer numbers to be tested in hyperparameter optimization.
-
-         Parameters
-         ----------
-         * 'start' [number]:
-            The start value for generating the number interval.
-            The resulting interval includes the value, default is 0.
-
-         * 'stop' [number]:
-            The stop value for generating the number interval.
-
-            - if range_type == "range":
-              the end value is not included in the interval (see documentation of numpy.arange).
-            - if range_type == "linspace"
-              the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.linspace).
-           - if range_type == "logspace"
-              the end value is included in the interval, unless endpoint is set to False (see documentation of numpy.logspace).
-
-         * 'range_type' [str]:
-            Which method to use for generating the number interval.
-            Possible options:
-
-            - "range": numpy.arange is used to generate a list of values separated by the same step width.
-            - "linspace": numpy.linspace is used to generate a certain number of values between start and stop.
-            - "logspace": numpy.logspace is used to generate a logarithmically distributed range of a certain length.
-
-         * 'step' [number, default=None, optional]:
-           if range_type == 'range', the spacing between values.
-
-         * 'num' [int, default=None, optional]:
-           if range_type == 'linspace' or range_type == 'logspace', the number of samples to generate.
-
-         * 'kwargs' [dict, optional]:
-           Further parameters that should be passed to the numpy function chosen with range_type.
-       """
-
-    def __init__(self, start, stop, range_type='range', step=None, num=None, **kwargs):
-            super().__init__(start, stop, range_type, step, num, np.int32, **kwargs)
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • IntegerRange
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.NumberRange
  • -
  • __pdoc_file_module__.optimization.Hyperparameters.PhotonHyperparam
  • -
  • builtins.object
  • -
-

Static methods

- -
-
-

def __init__(

self, start, stop, range_type='range', step=None, num=None, **kwargs)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, start, stop, range_type='range', step=None, num=None, **kwargs):
-        super().__init__(start, stop, range_type, step, num, np.int32, **kwargs)
-
- -
-
- -
- - -
-
-

def transform(

self)

-
- - - - -
- -
-
def transform(self):
-    values = []
-    if self.range_type == "range":
-        if not self.step:
-            values = np.arange(self.start, self.stop, dtype=self.num_type, **self.range_params)
-        else:
-            self.values = np.arange(self.start, self.stop, self.step, dtype=self.num_type, **self.range_params)
-    elif self.range_type == "linspace":
-        if self.num:
-            values = np.linspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params)
-        else:
-            values = np.linspace(self.start, self.stop, dtype=self.num_type, **self.range_params)
-    elif self.range_type == "logspace":
-        if self.num:
-            values = np.logspace(self.start, self.stop, num=self.num, dtype=self.num_type, **self.range_params)
-        else:
-            values = np.logspace(self.start, self.stop, dtype=self.num_type, **self.range_params)
-    # convert to python datatype because mongodb needs it
-    if self.num_type == np.int32:
-        self.values = [int(i) for i in values]
-    elif self.num_type == np.float32:
-        self.values = [float(i) for i in values]
-
- -
-
- -
- -
-
- -
-

class RandomGridSearchOptimizer

- - -
- -
-
class RandomGridSearchOptimizer(GridSearchOptimizer):
-
-    def __init__(self, k=None):
-        super(RandomGridSearchOptimizer, self).__init__()
-        self.k = k
-
-    def prepare(self, pipeline_elements):
-        super(RandomGridSearchOptimizer, self).prepare(pipeline_elements)
-        self.param_grid = list(self.param_grid)
-        # create random chaos in list
-        np.random.shuffle(self.param_grid)
-        if self.k is not None:
-            self.param_grid = self.param_grid[0:self.k]
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, k=None)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, k=None):
-    super(RandomGridSearchOptimizer, self).__init__()
-    self.k = k
-
- -
-
- -
- - -
-
-

def evaluate_recent_performance(

self, config, performance)

-
- - - - -
- -
-
def evaluate_recent_performance(self, config, performance):
-    # influence return value of next_config
-    pass
-
- -
-
- -
- - -
-
-

def next_config_generator(

self)

-
- - - - -
- -
-
def next_config_generator(self):
-    for parameters in self.param_grid:
-        yield parameters
-
- -
-
- -
- - -
-
-

def prepare(

self, pipeline_elements)

-
- - - - -
- -
-
def prepare(self, pipeline_elements):
-    super(RandomGridSearchOptimizer, self).prepare(pipeline_elements)
-    self.param_grid = list(self.param_grid)
-    # create random chaos in list
-    np.random.shuffle(self.param_grid)
-    if self.k is not None:
-        self.param_grid = self.param_grid[0:self.k]
-
- -
-
- -
- -

Instance variables

-
-

var k

- - - - -
-
- -
-
-
- -
-

class TimeBoxedRandomGridSearchOptimizer

- - -
- -
-
class TimeBoxedRandomGridSearchOptimizer(RandomGridSearchOptimizer):
-
-    def __init__(self, limit_in_minutes=60):
-        super(TimeBoxedRandomGridSearchOptimizer, self).__init__()
-        self.limit_in_minutes = limit_in_minutes
-        self.start_time = None
-        self.end_time = None
-
-    def prepare(self, pipeline_elements):
-        super(TimeBoxedRandomGridSearchOptimizer, self).prepare(pipeline_elements)
-        self.start_time = None
-
-    def next_config_generator(self):
-        if self.start_time is None:
-            self.start_time = datetime.datetime.now()
-            self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
-        for parameters in super(TimeBoxedRandomGridSearchOptimizer, self).next_config_generator():
-            if datetime.datetime.now() < self.end_time:
-                yield parameters
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, limit_in_minutes=60)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, limit_in_minutes=60):
-    super(TimeBoxedRandomGridSearchOptimizer, self).__init__()
-    self.limit_in_minutes = limit_in_minutes
-    self.start_time = None
-    self.end_time = None
-
- -
-
- -
- - -
-
-

def evaluate_recent_performance(

self, config, performance)

-
- - - - -
- -
-
def evaluate_recent_performance(self, config, performance):
-    # influence return value of next_config
-    pass
-
- -
-
- -
- - -
-
-

def next_config_generator(

self)

-
- - - - -
- -
-
def next_config_generator(self):
-    if self.start_time is None:
-        self.start_time = datetime.datetime.now()
-        self.end_time = self.start_time + datetime.timedelta(minutes=self.limit_in_minutes)
-    for parameters in super(TimeBoxedRandomGridSearchOptimizer, self).next_config_generator():
-        if datetime.datetime.now() < self.end_time:
-            yield parameters
-
- -
-
- -
- - -
-
-

def prepare(

self, pipeline_elements)

-
- - - - -
- -
-
def prepare(self, pipeline_elements):
-    super(TimeBoxedRandomGridSearchOptimizer, self).prepare(pipeline_elements)
-    self.start_time = None
-
- -
-
- -
- -

Instance variables

-
-

var end_time

- - - - -
-
- -
-
-

var limit_in_minutes

- - - - -
-
- -
-
-

var start_time

- - - - -
-
- -
-
-
- -
- -
-
- -
- - diff --git a/photonai/documentation/photonai/validation/index.html b/photonai/documentation/photonai/validation/index.html deleted file mode 100644 index a92ac0f7..00000000 --- a/photonai/documentation/photonai/validation/index.html +++ /dev/null @@ -1,2419 +0,0 @@ - - - - - - photonai.validation API documentation - - - - - - - - - - - - - - - -Top - -
- - - - -
- - - - - - -
-

photonai.validation module

-

PHOTON classes for testing a specific hyperparameter configuration and calculating the performance metrics.

- - -
-
"""
-PHOTON classes for testing a specific hyperparameter configuration and calculating the performance metrics.
-"""
-
-from .Validate import TestPipeline, Scorer, OptimizerMetric
-# from .PermutationTest import PermutationTest
-
-__all__ = ("TestPipeline",
-           "Scorer",
-           "OptimizerMetric")
-
- -
- -
- -
- - -

Classes

- -
-

class OptimizerMetric

- - -

Manages the metric that is chosen to pick the best hyperparameter configuration. -Automatically detects if the metric is better when the value increases or decreases.

-
- -
-
class OptimizerMetric(object):
-    """
-    Manages the metric that is chosen to pick the best hyperparameter configuration.
-    Automatically detects if the metric is better when the value increases or decreases.
-    """
-
-    def __init__(self, metric, pipeline_elements, other_metrics):
-        self.metric = metric
-        self.greater_is_better = None
-        self.other_metrics = other_metrics
-        self.set_optimizer_metric(pipeline_elements)
-
-    def check_metrics(self):
-        """
-        Checks the metric settings for convenience.
-
-        Check if the best config metric is included int list of metrics to be calculated.
-        Check if the best config metric is set but list of metrics is empty.
-        :return: validated list of metrics
-        """
-        if self.other_metrics:
-            if self.metric not in self.other_metrics:
-                self.other_metrics.append(self.metric)
-        # maybe there's a better solution to this
-        else:
-            self.other_metrics = [self.metric]
-        return self.other_metrics
-
-    def get_optimum_config(self, tested_configs):
-        """
-        Looks for the best configuration according to the metric with which the configurations are compared -> best config metric
-        :param tested_configs: the list of tested configurations and their performances
-        :return: MDBConfiguration that has performed best
-        """
-
-        list_of_config_vals = []
-        list_of_non_failed_configs = [conf for conf in tested_configs if not conf.config_failed]
-
-        if len(list_of_non_failed_configs) == 0:
-            raise Warning("No Configs found which did not fail.")
-        try:
-            for config in list_of_non_failed_configs:
-                list_of_config_vals.append(MDBHelper.get_metric(config, FoldOperations.MEAN, self.metric, train=False))
-
-            if self.greater_is_better:
-                # max metric
-                best_config_metric_nr = np.argmax(list_of_config_vals)
-            else:
-                # min metric
-                best_config_metric_nr = np.argmin(list_of_config_vals)
-            return list_of_non_failed_configs[best_config_metric_nr]
-        except BaseException as e:
-            Logger().error(str(e))
-
-    def get_optimum_config_outer_folds(self, outer_folds):
-
-        list_of_scores = list()
-        for outer_fold in outer_folds:
-            metrics = outer_fold.best_config.inner_folds[0].validation.metrics
-            list_of_scores.append(metrics[self.metric])
-
-        if self.greater_is_better:
-            # max metric
-            best_config_metric_nr = np.argmax(list_of_scores)
-        else:
-            # min metric
-            best_config_metric_nr = np.argmin(list_of_scores)
-
-        best_config = outer_folds[best_config_metric_nr].best_config
-        best_config_mdb = MDBConfig()
-        best_config_mdb.config_dict = best_config.config_dict
-        best_config_mdb.children_config_ref = best_config.children_config_ref
-        best_config_mdb.children_config_dict = best_config.children_config_dict
-        best_config_mdb.human_readable_config = best_config.human_readable_config
-        return best_config_mdb
-
-
-    def set_optimizer_metric(self, pipeline_elements):
-        """
-        Analyse and prepare the best config metric.
-        Derive if it is better when the value increases or decreases.
-        :param pipeline_elements: the items of the pipeline
-        """
-        if isinstance(self.metric, str):
-            if self.metric in Scorer.ELEMENT_DICTIONARY:
-                # for now do a simple hack and set greater_is_better
-                # by looking at error/score in metric name
-                metric_name = Scorer.ELEMENT_DICTIONARY[self.metric][1]
-                specifier = Scorer.ELEMENT_DICTIONARY[self.metric][2]
-                if specifier == 'score':
-                    self.greater_is_better = True
-                elif specifier == 'error':
-                    self.greater_is_better = False
-                else:
-                    # Todo: better error checking?
-                    error_msg = "Metric not suitable for optimizer."
-                    Logger().error(error_msg)
-                    raise NameError(error_msg)
-            else:
-                Logger().error('NameError: Specify valid metric.')
-                raise NameError('Specify valid metric.')
-        else:
-            # if no optimizer metric was chosen, use default scoring method
-            self.metric = 'score'
-
-            last_element = pipeline_elements[-1]
-            if hasattr(last_element.base_element, '_estimator_type'):
-                self.greater_is_better = True
-            else:
-                # Todo: better error checking?
-                Logger().error('NotImplementedError: ' +
-                               'Last pipeline element does not specify '+
-                               'whether it is a classifier, regressor, transformer or '+
-                               'clusterer.')
-                raise NotImplementedError('Last pipeline element does not specify '
-                                          'whether it is a classifier, regressor, transformer or '
-                                          'clusterer.')
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, metric, pipeline_elements, other_metrics)

-
- - - - -

Initialize self. See help(type(self)) for accurate signature.

-
- -
-
def __init__(self, metric, pipeline_elements, other_metrics):
-    self.metric = metric
-    self.greater_is_better = None
-    self.other_metrics = other_metrics
-    self.set_optimizer_metric(pipeline_elements)
-
- -
-
- -
- - -
-
-

def check_metrics(

self)

-
- - - - -

Checks the metric settings for convenience.

-

Check if the best config metric is included int list of metrics to be calculated. -Check if the best config metric is set but list of metrics is empty. -:return: validated list of metrics

-
- -
-
def check_metrics(self):
-    """
-    Checks the metric settings for convenience.
-    Check if the best config metric is included int list of metrics to be calculated.
-    Check if the best config metric is set but list of metrics is empty.
-    :return: validated list of metrics
-    """
-    if self.other_metrics:
-        if self.metric not in self.other_metrics:
-            self.other_metrics.append(self.metric)
-    # maybe there's a better solution to this
-    else:
-        self.other_metrics = [self.metric]
-    return self.other_metrics
-
- -
-
- -
- - -
-
-

def get_optimum_config(

self, tested_configs)

-
- - - - -

Looks for the best configuration according to the metric with which the configurations are compared -> best config metric -:param tested_configs: the list of tested configurations and their performances -:return: MDBConfiguration that has performed best

-
- -
-
def get_optimum_config(self, tested_configs):
-    """
-    Looks for the best configuration according to the metric with which the configurations are compared -> best config metric
-    :param tested_configs: the list of tested configurations and their performances
-    :return: MDBConfiguration that has performed best
-    """
-    list_of_config_vals = []
-    list_of_non_failed_configs = [conf for conf in tested_configs if not conf.config_failed]
-    if len(list_of_non_failed_configs) == 0:
-        raise Warning("No Configs found which did not fail.")
-    try:
-        for config in list_of_non_failed_configs:
-            list_of_config_vals.append(MDBHelper.get_metric(config, FoldOperations.MEAN, self.metric, train=False))
-        if self.greater_is_better:
-            # max metric
-            best_config_metric_nr = np.argmax(list_of_config_vals)
-        else:
-            # min metric
-            best_config_metric_nr = np.argmin(list_of_config_vals)
-        return list_of_non_failed_configs[best_config_metric_nr]
-    except BaseException as e:
-        Logger().error(str(e))
-
- -
-
- -
- - -
-
-

def get_optimum_config_outer_folds(

self, outer_folds)

-
- - - - -
- -
-
def get_optimum_config_outer_folds(self, outer_folds):
-    list_of_scores = list()
-    for outer_fold in outer_folds:
-        metrics = outer_fold.best_config.inner_folds[0].validation.metrics
-        list_of_scores.append(metrics[self.metric])
-    if self.greater_is_better:
-        # max metric
-        best_config_metric_nr = np.argmax(list_of_scores)
-    else:
-        # min metric
-        best_config_metric_nr = np.argmin(list_of_scores)
-    best_config = outer_folds[best_config_metric_nr].best_config
-    best_config_mdb = MDBConfig()
-    best_config_mdb.config_dict = best_config.config_dict
-    best_config_mdb.children_config_ref = best_config.children_config_ref
-    best_config_mdb.children_config_dict = best_config.children_config_dict
-    best_config_mdb.human_readable_config = best_config.human_readable_config
-    return best_config_mdb
-
- -
-
- -
- - -
-
-

def set_optimizer_metric(

self, pipeline_elements)

-
- - - - -

Analyse and prepare the best config metric. -Derive if it is better when the value increases or decreases. -:param pipeline_elements: the items of the pipeline

-
- -
-
def set_optimizer_metric(self, pipeline_elements):
-    """
-    Analyse and prepare the best config metric.
-    Derive if it is better when the value increases or decreases.
-    :param pipeline_elements: the items of the pipeline
-    """
-    if isinstance(self.metric, str):
-        if self.metric in Scorer.ELEMENT_DICTIONARY:
-            # for now do a simple hack and set greater_is_better
-            # by looking at error/score in metric name
-            metric_name = Scorer.ELEMENT_DICTIONARY[self.metric][1]
-            specifier = Scorer.ELEMENT_DICTIONARY[self.metric][2]
-            if specifier == 'score':
-                self.greater_is_better = True
-            elif specifier == 'error':
-                self.greater_is_better = False
-            else:
-                # Todo: better error checking?
-                error_msg = "Metric not suitable for optimizer."
-                Logger().error(error_msg)
-                raise NameError(error_msg)
-        else:
-            Logger().error('NameError: Specify valid metric.')
-            raise NameError('Specify valid metric.')
-    else:
-        # if no optimizer metric was chosen, use default scoring method
-        self.metric = 'score'
-        last_element = pipeline_elements[-1]
-        if hasattr(last_element.base_element, '_estimator_type'):
-            self.greater_is_better = True
-        else:
-            # Todo: better error checking?
-            Logger().error('NotImplementedError: ' +
-                           'Last pipeline element does not specify '+
-                           'whether it is a classifier, regressor, transformer or '+
-                           'clusterer.')
-            raise NotImplementedError('Last pipeline element does not specify '
-                                      'whether it is a classifier, regressor, transformer or '
-                                      'clusterer.')
-
- -
-
- -
- -

Instance variables

-
-

var greater_is_better

- - - - -
-
- -
-
-

var metric

- - - - -
-
- -
-
-

var other_metrics

- - - - -
-
- -
-
-
- -
-

class Scorer

- - -

Transforms a string literal into an callable instance of a particular metric

-
- -
-
class Scorer(object):
-    """
-    Transforms a string literal into an callable instance of a particular metric
-    """
-
-    ELEMENT_DICTIONARY = {
-        # Classification
-        'matthews_corrcoef': ('sklearn.metrics', 'matthews_corrcoef', None),
-        'confusion_matrix': ('sklearn.metrics', 'confusion_matrix', None),
-        'accuracy': ('sklearn.metrics', 'accuracy_score', 'score'),
-        'f1_score': ('sklearn.metrics', 'f1_score', 'score'),
-        'hamming_loss': ('sklearn.metrics', 'hamming_loss', 'error'),
-        'log_loss': ('sklearn.metrics', 'log_loss', 'error'),
-        'precision': ('sklearn.metrics', 'precision_score', 'score'),
-        'recall': ('sklearn.metrics', 'recall_score', 'score'),
-        'categorical_accuracy': ('photonai.validation.Metrics', 'categorical_accuracy_score', 'score'),
-        'categorical_crossentropy': ('photonai.validation.Metrics', 'categorical_crossentropy', 'error'),
-
-        # Regression
-        'mean_squared_error': ('sklearn.metrics', 'mean_squared_error', 'error'),
-        'mean_absolute_error': ('sklearn.metrics', 'mean_absolute_error', 'error'),
-        'explained_variance': ('sklearn.metrics', 'explained_variance_score', 'score'),
-        'r2': ('sklearn.metrics', 'r2_score', 'score'),
-        'pearson_correlation': ('photon_core.framework.Metrics', 'pearson_correlation', None),
-        'variance_explained':  ('photon_core.framework.Metrics', 'variance_explained_score', 'score')
-
-    }
-
-    @classmethod
-    def create(cls, metric):
-        """
-        Searches for the metric by name and instantiates the according calculation function
-        :param metric: the name of the metric as encoded in the ELEMENT_DICTIONARY
-        :type metric: str
-        :return: a callable instance of the metric calculation
-        """
-        if metric in Scorer.ELEMENT_DICTIONARY:
-            try:
-                desired_class_info = Scorer.ELEMENT_DICTIONARY[metric]
-                desired_class_home = desired_class_info[0]
-                desired_class_name = desired_class_info[1]
-                imported_module = __import__(desired_class_home, globals(),
-                                             locals(), desired_class_name, 0)
-                desired_class = getattr(imported_module, desired_class_name)
-                scoring_method = desired_class
-                return scoring_method
-            except AttributeError as ae:
-                Logger().error('ValueError: Could not find according class: '
-                               + Scorer.ELEMENT_DICTIONARY[metric])
-                raise ValueError('Could not find according class:',
-                                 Scorer.ELEMENT_DICTIONARY[metric])
-        else:
-            Logger().error('NameError: Metric not supported right now:' + metric)
-            raise NameError('Metric not supported right now:', metric)
-
- -
-
- - -
-

Ancestors (in MRO)

-
    -
  • Scorer
  • -
  • builtins.object
  • -
-

Class variables

-
-

var ELEMENT_DICTIONARY

- - - - -
-
- -
-

Methods

- -
-
-

def create(

cls, metric)

-
- - - - -

Searches for the metric by name and instantiates the according calculation function -:param metric: the name of the metric as encoded in the ELEMENT_DICTIONARY -:type metric: str -:return: a callable instance of the metric calculation

-
- -
-
@classmethod
-def create(cls, metric):
-    """
-    Searches for the metric by name and instantiates the according calculation function
-    :param metric: the name of the metric as encoded in the ELEMENT_DICTIONARY
-    :type metric: str
-    :return: a callable instance of the metric calculation
-    """
-    if metric in Scorer.ELEMENT_DICTIONARY:
-        try:
-            desired_class_info = Scorer.ELEMENT_DICTIONARY[metric]
-            desired_class_home = desired_class_info[0]
-            desired_class_name = desired_class_info[1]
-            imported_module = __import__(desired_class_home, globals(),
-                                         locals(), desired_class_name, 0)
-            desired_class = getattr(imported_module, desired_class_name)
-            scoring_method = desired_class
-            return scoring_method
-        except AttributeError as ae:
-            Logger().error('ValueError: Could not find according class: '
-                           + Scorer.ELEMENT_DICTIONARY[metric])
-            raise ValueError('Could not find according class:',
-                             Scorer.ELEMENT_DICTIONARY[metric])
-    else:
-        Logger().error('NameError: Metric not supported right now:' + metric)
-        raise NameError('Metric not supported right now:', metric)
-
- -
-
- -
- -
-
- -
-

class TestPipeline

- - -

Trains and tests a sklearn pipeline for a specific hyperparameter combination with cross-validation, -calculates metrics for each fold and averages metrics over all folds

-
- -
-
class TestPipeline(object):
-    """
-        Trains and tests a sklearn pipeline for a specific hyperparameter combination with cross-validation,
-        calculates metrics for each fold and averages metrics over all folds
-    """
-
-    def __init__(self, pipe: Pipeline, specific_config: dict, metrics: list, mother_inner_fold_handle,
-                 raise_error: bool=False, mongo_db_settings=None, callback_function = None):
-        """
-        Creates a new TestPipeline object
-        :param pipe: The sklearn pipeline instance that shall be trained and tested
-        :type pipe: Pipeline
-        :param specific_config: The hyperparameter configuration to test
-        :type specific_config: dict
-        :param metrics: List of metrics to calculate
-        :type metrics: list
-        :param mother_inner_fold_handle: Function handle in order to inform the hyperpipe about current inner_fold
-        :type mother_inner_fold_handle: function handle
-        :param raise_error: if true, raises exception when training and testing the pipeline fails
-        :type raise_error: bool
-        """
-
-        self.params = specific_config
-        self.pipe = pipe
-        self.metrics = metrics
-        self.raise_error = raise_error
-        self.mother_inner_fold_handle = mother_inner_fold_handle
-        self.mongo_db_settings = mongo_db_settings
-        self.callback_function = callback_function
-
-    def calculate_cv_score(self, X, y, cv_iter,
-                           calculate_metrics_per_fold: bool = True,
-                           calculate_metrics_across_folds: bool =False):
-        """
-        Iterates over cross-validation folds and trains the pipeline, then uses it for predictions.
-        Calculates metrics per fold and averages them over fold.
-        :param X: Training and test data
-        :param y: Training and test targets
-        :param cv_iter: function/array that yields train and test indices
-        :param save_predictions: if true, saves the predicted values into the result tree
-        :param calculate_metrics_per_fold: if True, calculates metrics on predictions particularly for each fold
-        :param calculate_metrics_across_folds: if True, collects predictions from all folds and calculate metrics on whole collective
-        :returns: configuration class for result tree that monitors training and test performance
-        """
-
-        # needed for testing Timeboxed Random Grid Search
-        # time.sleep(35)
-
-        config_item = MDBConfig()
-        config_item.inner_folds = []
-        config_item.metrics_test = []
-        config_item.metrics_train = []
-        fold_cnt = 0
-
-        overall_y_pred_test = []
-        overall_y_true_test = []
-        overall_y_pred_train = []
-        overall_y_true_train = []
-
-        # if we want to collect the predictions, we need to save them into the tree
-        original_save_predictions = self.mongo_db_settings.save_predictions
-        save_predictions = bool(self.mongo_db_settings.save_predictions)
-        save_feature_importances = self.mongo_db_settings.save_feature_importances
-        if calculate_metrics_across_folds:
-            save_predictions = True
-
-        inner_fold_list = []
-        try:
-
-            # do inner cv
-            for train, test in cv_iter:
-
-                    # set params to current config
-                    self.pipe.set_params(**self.params)
-
-                    # inform children in which inner fold we are
-                    # self.pipe.distribute_cv_info_to_hyperpipe_children(inner_fold_counter=fold_cnt)
-                    self.mother_inner_fold_handle(fold_cnt)
-
-                    # start fitting
-                    fit_start_time = time.time()
-                    self.pipe.fit(X[train], y[train])
-
-                    # Todo: Fit Process Metrics
-
-                    # write down how long the fitting took
-                    fit_duration = time.time()-fit_start_time
-                    config_item.fit_duration_minutes = fit_duration
-
-                    # score test data
-                    curr_test_fold = TestPipeline.score(self.pipe, X[test], y[test], self.metrics, indices=test,
-                                                        save_predictions=save_predictions,
-                                                        save_feature_importances=save_feature_importances)
-
-                    # score train data
-                    curr_train_fold = TestPipeline.score(self.pipe, X[train], y[train], self.metrics, indices=train,
-                                                         save_predictions=save_predictions,
-                                                         save_feature_importances=save_feature_importances)
-
-                    if calculate_metrics_across_folds:
-                        # if we have one hot encoded values -> concat horizontally
-                        if isinstance(curr_test_fold.y_pred, np.ndarray):
-                            if len(curr_test_fold.y_pred.shape) > 1:
-                                axis = 1
-                            else:
-                                axis = 0
-                        else:
-                            # if we have lists concat
-                            axis = 0
-                        overall_y_true_test = np.concatenate((overall_y_true_test, curr_test_fold.y_true), axis=axis)
-                        overall_y_pred_test = np.concatenate((overall_y_pred_test, curr_test_fold.y_pred), axis=axis)
-
-                        # we assume y_pred from the training set comes in the same shape as y_pred from the test se
-                        overall_y_true_train = np.concatenate((overall_y_true_train, curr_train_fold.y_true), axis=axis)
-                        overall_y_pred_train = np.concatenate((overall_y_pred_train, curr_train_fold.y_pred), axis=axis)
-
-                    # fill result tree with fold information
-                    inner_fold = MDBInnerFold()
-                    inner_fold.fold_nr = fold_cnt
-                    inner_fold.training = curr_train_fold
-                    inner_fold.validation = curr_test_fold
-                    #inner_fold.number_samples_training = int(len(train))
-                    #inner_fold.number_samples_validation = int(len(test))
-                    inner_fold_list.append(inner_fold)
-
-                    fold_cnt += 1
-
-                    if self.callback_function:
-                        if isinstance(self.callback_function, list):
-                            break_cv = 0
-                            for cf in self.callback_function:
-                                if not cf.shall_continue(inner_fold_list):
-                                    Logger().info('Skip further cross validation of config because of performance constraints')
-                                    break_cv += 1
-                                    break
-                            if break_cv > 0:
-                                break
-                        else:
-                            if not self.callback_function.shall_continue(inner_fold_list):
-                                Logger().info(
-                                    'Skip further cross validation of config because of performance constraints')
-                                break
-
-            # save all inner folds to the tree under the config item
-            config_item.inner_folds = inner_fold_list
-
-            # if we want to have metrics across all predictions from all folds:
-            if calculate_metrics_across_folds:
-                # metrics across folds
-                metrics_to_calculate = list(self.metrics)
-                if 'score' in metrics_to_calculate:
-                    metrics_to_calculate.remove('score')
-                metrics_train = TestPipeline.calculate_metrics(overall_y_true_train, overall_y_pred_train, metrics_to_calculate)
-                metrics_test = TestPipeline.calculate_metrics(overall_y_true_test, overall_y_pred_test, metrics_to_calculate)
-
-                def metric_to_db_class(metric_list):
-                    db_metrics = []
-                    for metric_name, metric_value in metric_list.items():
-                        new_metric = MDBFoldMetric(operation=FoldOperations.RAW, metric_name=metric_name,
-                                                   value=metric_value)
-                        db_metrics.append(new_metric)
-                    return db_metrics
-
-                db_metrics_train = metric_to_db_class(metrics_train)
-                db_metrics_test = metric_to_db_class(metrics_test)
-
-                # if we want to have metrics for each fold as well, calculate mean and std.
-                if calculate_metrics_per_fold:
-                    db_metrics_fold_train, db_metrics_fold_test = MDBHelper.aggregate_metrics(config_item,
-                                                                                              self.metrics)
-                    config_item.metrics_train = db_metrics_train + db_metrics_fold_train
-                    config_item.metrics_test = db_metrics_test + db_metrics_fold_test
-                else:
-                    config_item.metrics_train = db_metrics_train
-                    config_item.metrics_test = db_metrics_test
-
-                # we needed to save the true/predicted values to calculate the metrics across folds,
-                # but if the user is uninterested in it we dismiss them after the job is done
-                if not original_save_predictions:
-                    for inner_fold in config_item.inner_folds:
-                        # Todo: What about dismissing feature importances, too?
-                        inner_fold.training.y_true = []
-                        inner_fold.training.y_pred = []
-                        inner_fold.training.indices = []
-                        inner_fold.validation.y_true = []
-                        inner_fold.validation.y_pred = []
-                        inner_fold.validation.indices = []
-
-            elif calculate_metrics_per_fold:
-                # calculate mean and std over all fold metrics
-                config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics(config_item,
-                                                                                                  self.metrics)
-
-        except Exception as e:
-            if self.raise_error:
-                raise e
-            Logger().error(e)
-            traceback.print_exc()
-            config_item.config_failed = True
-            config_item.config_error = str(e)
-            warnings.warn('One test iteration of pipeline failed with error')
-
-        return config_item
-
-    @staticmethod
-    def score(estimator, X, y_true, metrics, indices=[],
-              save_predictions=False, save_feature_importances=False,
-              calculate_metrics: bool=True):
-        """
-        Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics
-
-        :param estimator: the pipeline or pipeline element for prediction
-        :param X: the data for prediction
-        :param y_true: the truth values for the data
-        :param metrics: the metrics to be calculated
-        :param indices: the indices of the given data and targets that are logged into the result tree
-        :param save_predictions: if True, the predicted value array is stored in to the result tree
-        :param calculate_metrics: if True, calculates metrics for given data
-        :return: ScoreInformation object
-        """
-
-        scoring_time_start = time.time()
-
-        output_metrics = {}
-        non_default_score_metrics = list(metrics)
-        if 'score' in metrics:
-            if hasattr(estimator, 'score'):
-                # Todo: Here it is potentially slowing down!!!!!!!!!!!!!!!!
-                default_score = estimator.score(X, y_true)
-                output_metrics['score'] = default_score
-                non_default_score_metrics.remove('score')
-
-        y_pred = estimator.predict(X)
-
-        f_importances = []
-        if save_feature_importances:
-            if hasattr(estimator._final_estimator.base_element, 'coef_'):
-                f_importances = estimator._final_estimator.base_element.coef_
-                f_importances = f_importances.tolist()
-            elif hasattr(estimator._final_estimator.base_element, 'feature_importances_'):
-                f_importances = estimator._final_estimator.base_element.feature_importances_
-                f_importances = f_importances.tolist()
-        # Nice to have
-        # TestPipeline.plot_some_data(y_true, y_pred)
-
-        if calculate_metrics:
-            score_metrics = TestPipeline.calculate_metrics(y_true, y_pred, non_default_score_metrics)
-
-            # add default metric
-            if output_metrics:
-                output_metrics = {**output_metrics, **score_metrics}
-            else:
-                output_metrics = score_metrics
-        else:
-            output_metrics = {}
-
-        final_scoring_time = time.time() - scoring_time_start
-        if save_predictions:
-            score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                      score_duration=final_scoring_time,
-                                                      y_pred=y_pred.tolist(), y_true=y_true.tolist(),
-                                                      indices=np.asarray(indices).tolist())
-            if save_feature_importances:
-                score_result_object.feature_importances = f_importances
-        elif save_feature_importances:
-            score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                      score_duration=final_scoring_time,
-                                                      feature_importances=f_importances)
-        else:
-            score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                      score_duration=final_scoring_time)
-        return score_result_object
-
-    @staticmethod
-    def calculate_metrics(y_true, y_pred, metrics):
-        """
-        Applies all metrics to the given predicted and true values.
-        The metrics are encoded via a string literal which is mapped to the according calculation function
-        :param y_true: the truth values
-        :type y_true: list
-        :param y_pred: the predicted values
-        :param metrics: list
-        :return: dict of metrics
-        """
-
-        # Todo: HOW TO CHECK IF ITS REGRESSION?!
-        # The following works only for classification
-        # if np.ndim(y_pred) == 2:
-        #     y_pred = one_hot_to_binary(y_pred)
-        #     Logger().warn("test_predictions was one hot encoded => transformed to binary")
-        #
-        # if np.ndim(y_true) == 2:
-        #     y_true = one_hot_to_binary(y_true)
-        #     Logger().warn("test_y was one hot encoded => transformed to binary")
-
-        output_metrics = {}
-        if metrics:
-            for metric in metrics:
-                scorer = Scorer.create(metric)
-                scorer_value = scorer(y_true, y_pred)
-                output_metrics[metric] = scorer_value
-
-        return output_metrics
-
- -
-
- - -
-

Ancestors (in MRO)

- -

Static methods

- -
-
-

def __init__(

self, pipe, specific_config, metrics, mother_inner_fold_handle, raise_error=False, mongo_db_settings=None, callback_function=None)

-
- - - - -

Creates a new TestPipeline object -:param pipe: The sklearn pipeline instance that shall be trained and tested -:type pipe: Pipeline -:param specific_config: The hyperparameter configuration to test -:type specific_config: dict -:param metrics: List of metrics to calculate -:type metrics: list -:param mother_inner_fold_handle: Function handle in order to inform the hyperpipe about current inner_fold -:type mother_inner_fold_handle: function handle -:param raise_error: if true, raises exception when training and testing the pipeline fails -:type raise_error: bool

-
- -
-
def __init__(self, pipe: Pipeline, specific_config: dict, metrics: list, mother_inner_fold_handle,
-             raise_error: bool=False, mongo_db_settings=None, callback_function = None):
-    """
-    Creates a new TestPipeline object
-    :param pipe: The sklearn pipeline instance that shall be trained and tested
-    :type pipe: Pipeline
-    :param specific_config: The hyperparameter configuration to test
-    :type specific_config: dict
-    :param metrics: List of metrics to calculate
-    :type metrics: list
-    :param mother_inner_fold_handle: Function handle in order to inform the hyperpipe about current inner_fold
-    :type mother_inner_fold_handle: function handle
-    :param raise_error: if true, raises exception when training and testing the pipeline fails
-    :type raise_error: bool
-    """
-    self.params = specific_config
-    self.pipe = pipe
-    self.metrics = metrics
-    self.raise_error = raise_error
-    self.mother_inner_fold_handle = mother_inner_fold_handle
-    self.mongo_db_settings = mongo_db_settings
-    self.callback_function = callback_function
-
- -
-
- -
- - -
-
-

def calculate_cv_score(

self, X, y, cv_iter, calculate_metrics_per_fold=True, calculate_metrics_across_folds=False)

-
- - - - -

Iterates over cross-validation folds and trains the pipeline, then uses it for predictions. -Calculates metrics per fold and averages them over fold. -:param X: Training and test data -:param y: Training and test targets -:param cv_iter: function/array that yields train and test indices -:param save_predictions: if true, saves the predicted values into the result tree -:param calculate_metrics_per_fold: if True, calculates metrics on predictions particularly for each fold -:param calculate_metrics_across_folds: if True, collects predictions from all folds and calculate metrics on whole collective -:returns: configuration class for result tree that monitors training and test performance

-
- -
-
def calculate_cv_score(self, X, y, cv_iter,
-                       calculate_metrics_per_fold: bool = True,
-                       calculate_metrics_across_folds: bool =False):
-    """
-    Iterates over cross-validation folds and trains the pipeline, then uses it for predictions.
-    Calculates metrics per fold and averages them over fold.
-    :param X: Training and test data
-    :param y: Training and test targets
-    :param cv_iter: function/array that yields train and test indices
-    :param save_predictions: if true, saves the predicted values into the result tree
-    :param calculate_metrics_per_fold: if True, calculates metrics on predictions particularly for each fold
-    :param calculate_metrics_across_folds: if True, collects predictions from all folds and calculate metrics on whole collective
-    :returns: configuration class for result tree that monitors training and test performance
-    """
-    # needed for testing Timeboxed Random Grid Search
-    # time.sleep(35)
-    config_item = MDBConfig()
-    config_item.inner_folds = []
-    config_item.metrics_test = []
-    config_item.metrics_train = []
-    fold_cnt = 0
-    overall_y_pred_test = []
-    overall_y_true_test = []
-    overall_y_pred_train = []
-    overall_y_true_train = []
-    # if we want to collect the predictions, we need to save them into the tree
-    original_save_predictions = self.mongo_db_settings.save_predictions
-    save_predictions = bool(self.mongo_db_settings.save_predictions)
-    save_feature_importances = self.mongo_db_settings.save_feature_importances
-    if calculate_metrics_across_folds:
-        save_predictions = True
-    inner_fold_list = []
-    try:
-        # do inner cv
-        for train, test in cv_iter:
-                # set params to current config
-                self.pipe.set_params(**self.params)
-                # inform children in which inner fold we are
-                # self.pipe.distribute_cv_info_to_hyperpipe_children(inner_fold_counter=fold_cnt)
-                self.mother_inner_fold_handle(fold_cnt)
-                # start fitting
-                fit_start_time = time.time()
-                self.pipe.fit(X[train], y[train])
-                # Todo: Fit Process Metrics
-                # write down how long the fitting took
-                fit_duration = time.time()-fit_start_time
-                config_item.fit_duration_minutes = fit_duration
-                # score test data
-                curr_test_fold = TestPipeline.score(self.pipe, X[test], y[test], self.metrics, indices=test,
-                                                    save_predictions=save_predictions,
-                                                    save_feature_importances=save_feature_importances)
-                # score train data
-                curr_train_fold = TestPipeline.score(self.pipe, X[train], y[train], self.metrics, indices=train,
-                                                     save_predictions=save_predictions,
-                                                     save_feature_importances=save_feature_importances)
-                if calculate_metrics_across_folds:
-                    # if we have one hot encoded values -> concat horizontally
-                    if isinstance(curr_test_fold.y_pred, np.ndarray):
-                        if len(curr_test_fold.y_pred.shape) > 1:
-                            axis = 1
-                        else:
-                            axis = 0
-                    else:
-                        # if we have lists concat
-                        axis = 0
-                    overall_y_true_test = np.concatenate((overall_y_true_test, curr_test_fold.y_true), axis=axis)
-                    overall_y_pred_test = np.concatenate((overall_y_pred_test, curr_test_fold.y_pred), axis=axis)
-                    # we assume y_pred from the training set comes in the same shape as y_pred from the test se
-                    overall_y_true_train = np.concatenate((overall_y_true_train, curr_train_fold.y_true), axis=axis)
-                    overall_y_pred_train = np.concatenate((overall_y_pred_train, curr_train_fold.y_pred), axis=axis)
-                # fill result tree with fold information
-                inner_fold = MDBInnerFold()
-                inner_fold.fold_nr = fold_cnt
-                inner_fold.training = curr_train_fold
-                inner_fold.validation = curr_test_fold
-                #inner_fold.number_samples_training = int(len(train))
-                #inner_fold.number_samples_validation = int(len(test))
-                inner_fold_list.append(inner_fold)
-                fold_cnt += 1
-                if self.callback_function:
-                    if isinstance(self.callback_function, list):
-                        break_cv = 0
-                        for cf in self.callback_function:
-                            if not cf.shall_continue(inner_fold_list):
-                                Logger().info('Skip further cross validation of config because of performance constraints')
-                                break_cv += 1
-                                break
-                        if break_cv > 0:
-                            break
-                    else:
-                        if not self.callback_function.shall_continue(inner_fold_list):
-                            Logger().info(
-                                'Skip further cross validation of config because of performance constraints')
-                            break
-        # save all inner folds to the tree under the config item
-        config_item.inner_folds = inner_fold_list
-        # if we want to have metrics across all predictions from all folds:
-        if calculate_metrics_across_folds:
-            # metrics across folds
-            metrics_to_calculate = list(self.metrics)
-            if 'score' in metrics_to_calculate:
-                metrics_to_calculate.remove('score')
-            metrics_train = TestPipeline.calculate_metrics(overall_y_true_train, overall_y_pred_train, metrics_to_calculate)
-            metrics_test = TestPipeline.calculate_metrics(overall_y_true_test, overall_y_pred_test, metrics_to_calculate)
-            def metric_to_db_class(metric_list):
-                db_metrics = []
-                for metric_name, metric_value in metric_list.items():
-                    new_metric = MDBFoldMetric(operation=FoldOperations.RAW, metric_name=metric_name,
-                                               value=metric_value)
-                    db_metrics.append(new_metric)
-                return db_metrics
-            db_metrics_train = metric_to_db_class(metrics_train)
-            db_metrics_test = metric_to_db_class(metrics_test)
-            # if we want to have metrics for each fold as well, calculate mean and std.
-            if calculate_metrics_per_fold:
-                db_metrics_fold_train, db_metrics_fold_test = MDBHelper.aggregate_metrics(config_item,
-                                                                                          self.metrics)
-                config_item.metrics_train = db_metrics_train + db_metrics_fold_train
-                config_item.metrics_test = db_metrics_test + db_metrics_fold_test
-            else:
-                config_item.metrics_train = db_metrics_train
-                config_item.metrics_test = db_metrics_test
-            # we needed to save the true/predicted values to calculate the metrics across folds,
-            # but if the user is uninterested in it we dismiss them after the job is done
-            if not original_save_predictions:
-                for inner_fold in config_item.inner_folds:
-                    # Todo: What about dismissing feature importances, too?
-                    inner_fold.training.y_true = []
-                    inner_fold.training.y_pred = []
-                    inner_fold.training.indices = []
-                    inner_fold.validation.y_true = []
-                    inner_fold.validation.y_pred = []
-                    inner_fold.validation.indices = []
-        elif calculate_metrics_per_fold:
-            # calculate mean and std over all fold metrics
-            config_item.metrics_train, config_item.metrics_test = MDBHelper.aggregate_metrics(config_item,
-                                                                                              self.metrics)
-    except Exception as e:
-        if self.raise_error:
-            raise e
-        Logger().error(e)
-        traceback.print_exc()
-        config_item.config_failed = True
-        config_item.config_error = str(e)
-        warnings.warn('One test iteration of pipeline failed with error')
-    return config_item
-
- -
-
- -
- - -
-
-

def calculate_metrics(

y_true, y_pred, metrics)

-
- - - - -

Applies all metrics to the given predicted and true values. -The metrics are encoded via a string literal which is mapped to the according calculation function -:param y_true: the truth values -:type y_true: list -:param y_pred: the predicted values -:param metrics: list -:return: dict of metrics

-
- -
-
@staticmethod
-def calculate_metrics(y_true, y_pred, metrics):
-    """
-    Applies all metrics to the given predicted and true values.
-    The metrics are encoded via a string literal which is mapped to the according calculation function
-    :param y_true: the truth values
-    :type y_true: list
-    :param y_pred: the predicted values
-    :param metrics: list
-    :return: dict of metrics
-    """
-    # Todo: HOW TO CHECK IF ITS REGRESSION?!
-    # The following works only for classification
-    # if np.ndim(y_pred) == 2:
-    #     y_pred = one_hot_to_binary(y_pred)
-    #     Logger().warn("test_predictions was one hot encoded => transformed to binary")
-    #
-    # if np.ndim(y_true) == 2:
-    #     y_true = one_hot_to_binary(y_true)
-    #     Logger().warn("test_y was one hot encoded => transformed to binary")
-    output_metrics = {}
-    if metrics:
-        for metric in metrics:
-            scorer = Scorer.create(metric)
-            scorer_value = scorer(y_true, y_pred)
-            output_metrics[metric] = scorer_value
-    return output_metrics
-
- -
-
- -
- - -
-
-

def score(

estimator, X, y_true, metrics, indices=[], save_predictions=False, save_feature_importances=False, calculate_metrics=True)

-
- - - - -

Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics

-

:param estimator: the pipeline or pipeline element for prediction -:param X: the data for prediction -:param y_true: the truth values for the data -:param metrics: the metrics to be calculated -:param indices: the indices of the given data and targets that are logged into the result tree -:param save_predictions: if True, the predicted value array is stored in to the result tree -:param calculate_metrics: if True, calculates metrics for given data -:return: ScoreInformation object

-
- -
-
@staticmethod
-def score(estimator, X, y_true, metrics, indices=[],
-          save_predictions=False, save_feature_importances=False,
-          calculate_metrics: bool=True):
-    """
-    Uses the pipeline to predict the given data, compare it to the truth values and calculate metrics
-    :param estimator: the pipeline or pipeline element for prediction
-    :param X: the data for prediction
-    :param y_true: the truth values for the data
-    :param metrics: the metrics to be calculated
-    :param indices: the indices of the given data and targets that are logged into the result tree
-    :param save_predictions: if True, the predicted value array is stored in to the result tree
-    :param calculate_metrics: if True, calculates metrics for given data
-    :return: ScoreInformation object
-    """
-    scoring_time_start = time.time()
-    output_metrics = {}
-    non_default_score_metrics = list(metrics)
-    if 'score' in metrics:
-        if hasattr(estimator, 'score'):
-            # Todo: Here it is potentially slowing down!!!!!!!!!!!!!!!!
-            default_score = estimator.score(X, y_true)
-            output_metrics['score'] = default_score
-            non_default_score_metrics.remove('score')
-    y_pred = estimator.predict(X)
-    f_importances = []
-    if save_feature_importances:
-        if hasattr(estimator._final_estimator.base_element, 'coef_'):
-            f_importances = estimator._final_estimator.base_element.coef_
-            f_importances = f_importances.tolist()
-        elif hasattr(estimator._final_estimator.base_element, 'feature_importances_'):
-            f_importances = estimator._final_estimator.base_element.feature_importances_
-            f_importances = f_importances.tolist()
-    # Nice to have
-    # TestPipeline.plot_some_data(y_true, y_pred)
-    if calculate_metrics:
-        score_metrics = TestPipeline.calculate_metrics(y_true, y_pred, non_default_score_metrics)
-        # add default metric
-        if output_metrics:
-            output_metrics = {**output_metrics, **score_metrics}
-        else:
-            output_metrics = score_metrics
-    else:
-        output_metrics = {}
-    final_scoring_time = time.time() - scoring_time_start
-    if save_predictions:
-        score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                  score_duration=final_scoring_time,
-                                                  y_pred=y_pred.tolist(), y_true=y_true.tolist(),
-                                                  indices=np.asarray(indices).tolist())
-        if save_feature_importances:
-            score_result_object.feature_importances = f_importances
-    elif save_feature_importances:
-        score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                  score_duration=final_scoring_time,
-                                                  feature_importances=f_importances)
-    else:
-        score_result_object = MDBScoreInformation(metrics=output_metrics,
-                                                  score_duration=final_scoring_time)
-    return score_result_object
-
- -
-
- -
- -

Instance variables

-
-

var callback_function

- - - - -
-
- -
-
-

var metrics

- - - - -
-
- -
-
-

var mongo_db_settings

- - - - -
-
- -
-
-

var mother_inner_fold_handle

- - - - -
-
- -
-
-

var params

- - - - -
-
- -
-
-

var pipe

- - - - -
-
- -
-
-

var raise_error

- - - - -
-
- -
-
-
- -
- -
-
- -
- - diff --git a/photonai/examples/BrainMapping_example.py b/photonai/examples/BrainMapping_example.py deleted file mode 100644 index e98dd66c..00000000 --- a/photonai/examples/BrainMapping_example.py +++ /dev/null @@ -1,47 +0,0 @@ -from photonai.base.PhotonBase import Hyperpipe, PipelineElement, PipelineSwitch, OutputSettings -from photonai.neuro.AtlasStacker import AtlasInfo -from photonai.neuro.AtlasMapping import AtlasMapping -from sklearn.model_selection import KFold, ShuffleSplit, GroupKFold -from nilearn import datasets - -# get oasis gm data and age from nilearn; imgs -oasis_dataset = datasets.fetch_oasis_vbm(n_subjects=90) -dataset_files = oasis_dataset.gray_matter_maps -targets = oasis_dataset.ext_vars['age'].astype(float) # age - -# where to write the results -folder = '' - -# which atlases are available? -#BrainAtlas.whichAtlases() - -# define hyperpipe to be applied to each ROI as usual -pers_opts = OutputSettings(local_file='dummy_file', - save_predictions='best', - save_feature_importances='None') -my_pipe = Hyperpipe(name='dummy_pipe', # the name of your pipeline - optimizer='grid_search', # which optimizer PHOTON shall use - metrics=['mean_absolute_error', 'mean_squared_error', 'pearson_correlation'], - best_config_metric='mean_absolute_error', - outer_cv=KFold(n_splits=3, shuffle=True, random_state=42), - inner_cv=KFold(n_splits=3, shuffle=True, random_state=42), - output_settings=pers_opts, - verbosity=0) -my_pipe += PipelineElement('StandardScaler') -my_pipe += PipelineElement('SVR', {'kernel': ['linear', 'rbf']}) - - -# get info for the atlas -atlas_info = AtlasInfo(atlas_name='AAL', roi_names=['Precentral_L', 'Precentral_R', 'Frontal_Sup_R'], extraction_mode='vec') -#atlas_info = AtlasInfo(atlas_name='HarvardOxford-cort-maxprob-thr50', roi_names='all', extraction_mode='mean') -#atlas_info = AtlasInfo(atlas_name='AAL', roi_names='all', extraction_mode='vec') -#atlas_info = AtlasInfo(atlas_name='AAL', roi_names=[2001, 2002, 2102], extraction_mode='vec') -#atlas_info = AtlasInfo(atlas_name='AAL', roi_names='all', extraction_mode='box') -# atlas_info = AtlasInfo(atlas_name='mni_icbm152_t1_tal_nlin_sym_09a_mask', mask_threshold=.5, roi_names='all', extraction_mode='vec') - -# fit hyperpipe to every ROI independently and return the results -roi_results_table = AtlasMapping.mapAtlas(dataset_files=dataset_files, targets=targets, hyperpipe=my_pipe, atlas_info=atlas_info, write_to_folder=folder) - -debug = True - - diff --git a/photonai/examples/Brain_Age_Master.py b/photonai/examples/Brain_Age_Master.py deleted file mode 100644 index 5108aa8e..00000000 --- a/photonai/examples/Brain_Age_Master.py +++ /dev/null @@ -1,101 +0,0 @@ -import numpy as np -import pandas as pd -import tensorflow as tf -from sklearn.model_selection import KFold -#from skopt import Optimizer -#from skopt.optimizer import dummy_minimize -#from skopt import dummy_minimize -import scipy.io as sio -import keras -from photonai.base.PhotonBase import Hyperpipe, PipelineElement, PhotonRegister -from photonai.base.PhotonBatchElement import PhotonBatchElement -from photonai.validation import ResultsTreeHandler -from photonai.neuro.BrainAtlas import AtlasLibrary -from scipy.stats import itemfreq -from photonai.investigator.Investigator import Investigator -import matplotlib.pyplot as plt -import pandas as pd -from nilearn import image -import time - - -import os -os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"]="2" - - -# RandomCtrlData = np.ones((1792, 121, 145, 121)) -# RandomCtrlData = np.ones((172, 121, 145, 121)) -# RandomCtrlLabels = np.random.randn((RandomCtrlData.shape[0])) - -root_folder = '/spm-data/Scratch/spielwiese_ramona/PAC2018/' -filename = 'PAC2018_age.csv' -df = pd.read_csv(os.path.join(root_folder, filename)) - -X = df["PAC_ID"] -X = [os.path.join(root_folder, 'data_all/' + x + ".nii") for x in X] -y = df["Age"].values - -X = X[0:1500] -y = y[0:1500] - -# -PhotonRegister.save(photon_name='Brain_Age_Splitting_Wrapper', - class_str='photonai.modelwrapper.Brain_Age_Splitting_Wrapper.Brain_Age_Splitting_Wrapper', element_type="Transformer") -# -# PhotonRegister.save(photon_name='Brain_Age_Splitting_CNN', -# class_str='photonai.modelwrapper.Brain_Age_Splitting_CNN.Brain_Age_Splitting_CNN', element_type="Estimator") -# -PhotonRegister.save(photon_name='Brain_Age_Random_Forest', - class_str='photonai.modelwrapper.Brain_Age_Random_Forest.Brain_Age_Random_Forest', element_type="Estimator") - -my_pipe = Hyperpipe('BrainAgePipe', - optimizer='grid_search', - metrics=['mean_absolute_error'], - best_config_metric='mean_absolute_error', - inner_cv=KFold(n_splits=5, shuffle=True, random_state=42), - outer_cv=KFold(n_splits=5, shuffle=True, random_state=42), - eval_final_performance=False, - verbosity=2) - -# transformer = PipelineElement(, hyperparameters={}) -# base_element=transformer -batched_transformer = PhotonBatchElement("PatchImages", hyperparameters={'patch_size': [10, 25, 50, 75, 100]}, - batch_size=100, - nr_of_processes=10, - cache_folder='/spm-data/vault-data1/tmp/photon_cache_vincent/') -my_pipe += batched_transformer - - -#my_pipe += PipelineElement('Brain_Age_Splitting_Wrapper') - -my_pipe += PipelineElement('Brain_Age_Random_Forest') - -my_pipe.fit(X, y) - -batched_transformer.base_element.clear_cache() - - - - - - - - - -inner_performances = list() -for i, fold in enumerate(my_pipe.result_tree.outer_folds[0].tested_config_list): - inner_performances.append((fold.config_dict, fold.metrics_test[0].value)) -print(inner_performances) - -plt.ylim(0.2, 0.8) -plt.xticks(rotation=90) -plt.margins(0.3) - -for i, lelles in inner_performances: - print(i, lelles) - Benis = ",".join(("{}={}".format(*p) for p in i.items())) - plt.plot(Benis, lelles, 'ro') - - -plt.show() diff --git a/photonai/examples/brain_mapping_example.py b/photonai/examples/brain_mapping_example.py deleted file mode 100644 index c95ce301..00000000 --- a/photonai/examples/brain_mapping_example.py +++ /dev/null @@ -1,51 +0,0 @@ -from photonai.neuro.AtlasStacker import AtlasInfo -from photonai.neuro.AtlasMapping import AtlasMapping - -def hyperpipe_constructor(): - # hyperpipe construtor - from photonai.base.PhotonBase import Hyperpipe, PipelineElement, OutputSettings - from sklearn.model_selection import KFold - - pers_opts = OutputSettings(save_predictions='best', - save_feature_importances='None') - - my_pipe = Hyperpipe(name='dummy_pipe', # the name of your pipeline - optimizer='grid_search', # which optimizer PHOTON shall use - metrics=['mean_absolute_error', 'mean_squared_error', 'pearson_correlation'], - best_config_metric='mean_absolute_error', - outer_cv=KFold(n_splits=3, shuffle=True, random_state=42), - inner_cv=KFold(n_splits=3, shuffle=True, random_state=42), - output_settings=pers_opts, - verbosity=0) - - my_pipe += PipelineElement('StandardScaler') - my_pipe += PipelineElement('SVR', {'kernel': ['linear', 'rbf']}) - - return my_pipe - - -if __name__ == '__main__': - # get oasis gm data and age from nilearn; imgs - from nilearn import datasets - oasis_dataset = datasets.fetch_oasis_vbm(n_subjects=90) - dataset_files = oasis_dataset.gray_matter_maps - targets = oasis_dataset.ext_vars['age'].astype(float) # age - - # which atlases are available? - # BrainAtlas.whichAtlases() - - # where to write the results - folder = '' - - # get info for the atlas - atlas_info = AtlasInfo(atlas_name='AAL', roi_names=['Precentral_L', 'Precentral_R', 'Frontal_Sup_R'], - extraction_mode='vec') - #atlas_info = AtlasInfo(atlas_name='AAL', roi_names='all', extraction_mode='box') - - - atlas_mapper = AtlasMapping(atlas_info=atlas_info, hyperpipe_constructor=hyperpipe_constructor, - write_to_folder=folder, - n_processes=3, write_summary_to_excel=True) - - results = atlas_mapper.fit(dataset_files=dataset_files, targets=targets) - print(results) diff --git a/photonai/examples/downsample_mnc.py b/photonai/examples/downsample_mnc.py deleted file mode 100644 index 066a6a2b..00000000 --- a/photonai/examples/downsample_mnc.py +++ /dev/null @@ -1,88 +0,0 @@ -from photonai.neuro.ImageBasics import ResampleImages, SmoothImages, PatchImages -import numpy as np -import pickle -import time - -file = ["/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0001.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0009.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0020.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0070.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0073.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0071.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0072.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0074.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0075.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0076.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0077.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0078.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0080.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0081.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0082.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0083.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0084.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0085.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0086.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0087.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0001.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0009.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0020.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0070.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0073.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0071.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0072.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0074.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0075.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0076.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0077.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0078.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0080.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0081.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0082.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0083.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0084.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0085.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0086.nii", - "/spm-data/Scratch/spielwiese_ramona/PAC2018/data/PAC2018_0087.nii" - ] - -start_time = time.time() -# t = ResampleImages(voxel_size=[1, 1, 1], nr_of_processes=10) -t = PatchImages(nr_of_processes=10) -resampled_images = t.transform(file) -elapsed_time = time.time() - start_time -print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) - - -start_time = time.time() -# t = ResampleImages(voxel_size=[1, 1, 1], nr_of_processes=1) -t = PatchImages(nr_of_processes=1) -resampled_images = t.transform(file) -elapsed_time = time.time() - start_time -print(time.strftime("%H:%M:%S", time.gmtime(elapsed_time))) - - -# patched_image = PatchImages.draw_patches(resampled_images[0].dataobj, 25) - -# patcher = PatchImages() -# patched_images = patcher.transform(resampled_images, nr_of_processes = 10) - -debug = True -# -# # # pickle.dump(resampled_images, open('resampled_imgs.p', 'wb')) -# # -# # resampled_images = pickle.load(open('resampled_imgs.p', 'rb')) -# t2 = SmoothImages(fwhm=[2, 2, 2], nr_of_processes=3) -# print("Now should smooth images") -# smoothed_images = t2.transform(resampled_images) -# print("Images smoothed") -# -# print(len(smoothed_images)) -# t3 = ResampleImages(voxel_size=[3, 5, 10], nr_of_processes=5) -# res_smoothed_images = t3.transform(smoothed_images) - - -# downsampled_file = resample_img(file[0], target_affine=np.diag([1, 1, 1]), interpolation='nearest') -# downsampled_file.to_filename('/spm-data/Scratch/spielwiese_ramona/PAC2019/raw_data/N3107/N3107_downsampled.nii') -debug = True - - diff --git a/photonai/examples/neuro_image_transformation.py b/photonai/examples/neuro_image_transformation.py deleted file mode 100644 index cef6b677..00000000 --- a/photonai/examples/neuro_image_transformation.py +++ /dev/null @@ -1,30 +0,0 @@ -from photonai.base.PhotonBase import Hyperpipe, PipelineElement -from photonai.base.PhotonBatchElement import PhotonBatchElement -from sklearn.model_selection import KFold -import glob -import numpy as np - - -my_pipe = Hyperpipe('BatchAndSmooth', # the name of your pipeline - optimizer='grid_search', # which optimizer PHOTON shall use - metrics=['mean_absolute_error'], # the performance metrics of your interest - best_config_metric='mean_absolute_error', # after hyperparameter search, the metric declares the winner config - outer_cv=KFold(n_splits=2), # repeat hyperparameter search three times - inner_cv=KFold(n_splits=2), # test each configuration ten times respectively, - verbosity=2) - - -root_folder = "/spm-data/Scratch/spielwiese_ramona/test_photon_neuro/*.nii" -file_list = glob.glob(root_folder) -y = np.random.randint(145, size=len(file_list)).astype(np.float) - -my_pipe += PhotonBatchElement("SmoothImages", hyperparameters={'fwhm': [[2, 2, 2], [3, 3, 3], [4, 4, 4]]}, - batch_size=2) -my_pipe += PhotonBatchElement("ResampleImages", hyperparameters={'voxel_size': [[3, 3, 3], [2, 2, 2], [5, 5, 5]]}, - batch_size=2, output_img=False) - -my_pipe += PipelineElement('SVR') - -my_pipe.fit(file_list[:10], y[:10]) - -debug = True diff --git a/photonai/examples/neuro_module_branch_example.py b/photonai/examples/neuro_module_branch_example.py deleted file mode 100644 index e1c25220..00000000 --- a/photonai/examples/neuro_module_branch_example.py +++ /dev/null @@ -1,41 +0,0 @@ -""" -PHOTON Example of NeuroModuleBranch - -In this example, we take a look at the NeuroModuleBranch class that can be used just like a PHOTON PipelineBranch, only -that it is designed to work for neuro PipelineElements only. We will build a neuro branch and add different neuro -PipelineElements to it. We will then try to predict the age of the subjects we look at. The data comes from the OASIS -dataset. - -""" -from photonai.base.PhotonBase import Hyperpipe, PipelineElement -from photonai.neuro.NeuroBase import NeuroModuleBranch -from photonai.neuro.AtlasStacker import AtlasInfo -from nilearn import datasets -from sklearn.model_selection import KFold - -# Get data first, 50 subjects from the OASIS database, VBM images and age -oasis = datasets.fetch_oasis_vbm(n_subjects=50) -files = oasis.gray_matter_maps -targets = oasis.ext_vars['age'].astype(float) - -# We start by building a NeuroModuleBranch including a brain atlas -neuro_branch = NeuroModuleBranch('NeuroBranch') -neuro_branch += PipelineElement('ResampleImgs', {'voxel_size': [[5, 5, 5]]}) -atlas_info = AtlasInfo(atlas_name='AAL', roi_names=['Precentral_R'], extraction_mode='vec') -neuro_branch += PipelineElement('BrainAtlas', {}, atlas_info_object=atlas_info) - -# Now, we build a Hyperpipe and add the neuro branch to it -pipe = Hyperpipe('neuro_module_branch_example', optimizer='grid_search', - optimizer_params={}, - metrics=['mean_squared_error', 'mean_absolute_error'], - best_config_metric='mean_squared_error', - inner_cv=KFold(n_splits=2, shuffle=True, random_state=3), - outer_cv=KFold(n_splits=2, shuffle=True, random_state=3), - eval_final_performance=True) -pipe += neuro_branch - -# Finally, we add an estimator -pipe += PipelineElement('SVR', {}, kernel='linear', C=0.001) - -# We can now run PHOTON and try to predict age -pipe.fit(files, targets) diff --git a/photonai/examples/new_example1.py b/photonai/examples/new_example1.py deleted file mode 100644 index e69de29b..00000000 diff --git a/photonai/examples/pretrained_cnn.py b/photonai/examples/pretrained_cnn.py deleted file mode 100644 index b1222ce5..00000000 --- a/photonai/examples/pretrained_cnn.py +++ /dev/null @@ -1,72 +0,0 @@ -""" -=========================================================== -Project: -=========================================================== -Description ------------ - -Version -------- -Created: DD-MM-YYYY -Last updated: DD-MM-YYYY - - -Author ------- -Nils R. Winter -nils.r.winter@gmail.com -Translationale Psychiatrie -Universitaetsklinikum Muenster -""" - -from photonai.base.PhotonBase import Hyperpipe, PipelineElement, OutputSettings -from photonai.optimization.Hyperparameters import FloatRange, Categorical -from photonai.investigator.Investigator import Investigator -from photonai.configuration.Register import PhotonRegister -from sklearn.model_selection import KFold, ShuffleSplit -import numpy as np -import pandas as pd -import os -import cv2 - - -# GET DATA -def load_data(path, data_path): - X = [] - y = [] - data_set = pd.read_csv(path) - print('Read train images') - for index, row in data_set.iterrows(): - image_path = os.path.join(data_path, str(row['img'])) - print(image_path) - img = cv2.resize(cv2.imread(image_path, cv2.IMREAD_COLOR), (256, 192) ).astype(np.float32)/255 - X.append(img) - y.append( [ row['rating'] ] ) - print ("Loading"+row['img'], end="\r") - return np.array(X), np.array(y) - - -X_train, y_train = load_data('/home/nwinter/Downloads/Tim/Train6.csv', '/home/nwinter/Downloads/Tim') -#X_test, y_test = load_data('/home/nwinter/Downloads/Tim/Test7.csv', '/home/nwinter/Downloads/Tim') - - - -# DESIGN YOUR PIPELINE -my_pipe = Hyperpipe('pretrained_cnn', # the name of your pipeline - optimizer='grid_search', # which optimizer PHOTON shall use - metrics=['mean_squared_error'], # the performance metrics of your interest - best_config_metric='mean_squared_error', # after hyperparameter search, the metric declares the winner config - outer_cv=ShuffleSplit(n_splits=1, test_size=0.2), # repeat hyperparameter search three times - inner_cv=ShuffleSplit(n_splits=1, test_size=0.2), # test each configuration ten times respectively - verbosity=1) # get error, warn and info message ) - - - -# ADD ELEMENTS TO YOUR PIPELINE - -# engage and optimize the good old SVM for Classification -my_pipe += PipelineElement('PretrainedCNNRegressor', {}, input_shape=(192,256,3), freezing_point=8) - - -# NOW TRAIN YOUR PIPELINE -my_pipe.fit(X_train, y_train) \ No newline at end of file diff --git a/photonai/examples/sample_pairing_example.py b/photonai/examples/sample_pairing_example.py index 217671da..b13e01a1 100644 --- a/photonai/examples/sample_pairing_example.py +++ b/photonai/examples/sample_pairing_example.py @@ -28,34 +28,6 @@ import numpy as np -# # WE USE THE BREAST CANCER SET FROM SKLEARN -# X, y = load_boston(True) -# -# # DESIGN YOUR PIPELINE -# my_pipe = Hyperpipe('sample_pairing_example', # the name of your pipeline -# optimizer='grid_search', # which optimizer PHOTON shall use -# metrics=['mean_squared_error', 'pearson_correlation'], # the performance metrics of your interest -# best_config_metric='mean_squared_error', # after hyperparameter search, the metric declares the winner config -# outer_cv=KFold(n_splits=5), # repeat hyperparameter search three times -# inner_cv=KFold(n_splits=5), # test each configuration ten times respectively -# verbosity=1) # get error, warn and info message ) -# -# -# # ADD ELEMENTS TO YOUR PIPELINE -# # first normalize all features -# my_pipe += PipelineElement('StandardScaler') -# -# # add sample pairing -# my_pipe += PipelineElement('SamplePairingRegression', {'draw_limit': [500, 1000, 10000], -# 'generator': Categorical(['nearest_pair', 'random_pair'])}, -# distance_metric='euclidean', test_disabled=True) -# -# # engage and optimize the good old SVM for Classification -# my_pipe += PipelineElement('RandomForestRegressor', hyperparameters={'n_estimators': [10]}) -# -# # NOW TRAIN YOUR PIPELINE -# my_pipe.fit(X, y) - # WE USE THE BREAST CANCER SET FROM SKLEARN @@ -86,8 +58,6 @@ # NOW TRAIN YOUR PIPELINE my_pipe.fit(X, y) - - debug = True diff --git a/photonai/examples/stacking_example.py b/photonai/examples/stacking_example.py index 2a01ea58..f4fd8751 100644 --- a/photonai/examples/stacking_example.py +++ b/photonai/examples/stacking_example.py @@ -1,4 +1,4 @@ -from photonai.base.PhotonBase import Hyperpipe, PipelineElement, PipelineStacking, +from photonai.base.PhotonBase import Hyperpipe, PipelineElement, PipelineStacking from photonai.optimization.Hyperparameters import FloatRange, IntegerRange, Categorical from sklearn.model_selection import KFold diff --git a/photonai/modelwrapper/AnovaFeatureSelection.py b/photonai/modelwrapper/AnovaFeatureSelection.py deleted file mode 100644 index 21f0421b..00000000 --- a/photonai/modelwrapper/AnovaFeatureSelection.py +++ /dev/null @@ -1,32 +0,0 @@ -from sklearn.base import BaseEstimator, ClassifierMixin -from scipy import stats - - -class AnovaFeatureSelection(BaseEstimator, ClassifierMixin): - - def __init__(self, p_threshold=0.5): - self.p_threshold = p_threshold - - def fit(self, data, targets): - return self - - def transform(self, data, targets): - - snp_in = [] - for snpInd in range(data.shape[1]): - a = targets[data[:, snpInd] == 1] - b = targets[data[:, snpInd] == 2] - c = targets[data[:, snpInd] == 3] - - f, p = stats.f_oneway(a, b, c) - - if p < self.p_threshold: - # print('One-way ANOVA - snp_name ' + snp_names[snpInd]) - # print('=============') - # print('F value:', f) - # print('P value:', p, '\n') - snp_in.append(snpInd) - - if len(snp_in) <= 1: - return [] - return data[:, snp_in] diff --git a/photonai/modelwrapper/Biclustering2d.py b/photonai/modelwrapper/Biclustering2d.py deleted file mode 100644 index ea6b9de9..00000000 --- a/photonai/modelwrapper/Biclustering2d.py +++ /dev/null @@ -1,85 +0,0 @@ -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.cluster import SpectralBiclustering -import numpy as np -import os -from matplotlib import pyplot as plt -#from photonai.photonlogger.Logger import Logger - -class Biclustering2d(BaseEstimator, TransformerMixin): - _estimator_type = "transformer" - - def __init__(self, n_clusters=4, random_state=42, scale='bistochastic', n_components=6, - n_best=3, logs=''): - self.n_clusters = n_clusters - self.random_state = random_state - self.scale = scale # ‘scale’, ‘bistochastic’, or ‘log’ (log cannot handle sparse data) - self.n_components = n_components - self.n_best = n_best - if logs: - self.logs = logs - else: - self.logs = os.getcwd() - - def fit(self, X, y): - # Biclustering of the mean 2d image of all samples - HC = X[y==0,:] - print(HC.shape) - X_mean = np.squeeze(np.mean(HC, axis=0)) - #X_mean = np.squeeze(np.mean(X, axis=0)) - self.biclustModel = self.create_model() - self.biclustModel.fit(X_mean) - - # Plotting the clustered matrix - fit_data = X_mean[np.argsort(self.biclustModel.row_labels_)] - fit_data = fit_data[:, np.argsort(self.biclustModel.column_labels_)] - plt.matshow(fit_data) - plt.title(self.n_clusters) - plt.show() - - return self - - def transform(self, X): - X_reordered = np.empty(X.shape) - for i in range(X.shape[0]): - x = np.squeeze(X[i,:,:]) - x_clust = x[np.argsort(self.biclustModel.row_labels_)] - x_clust = x_clust[:, np.argsort(self.biclustModel.column_labels_)] - X_reordered[i, :, :] = x_clust - return X_reordered - - def create_model(self): - - biclustModel = SpectralBiclustering(n_clusters=self.n_clusters, random_state=self.random_state, - method=self.scale, n_components=self.n_components, - n_best=self.n_best) - - - - return biclustModel - - # ToDo: add these functions again - # def set_params(self, **params): - # if 'n_components' in params: - # self.n_clusters = params['n_components'] - # if 'logs' in params: - # self.logs = params.pop('logs', None) - # - # if not self.biclustModel: - # self.biclustModel = self.createBiclustering() - # self.biclustModel.set_params(**params) - # - # def get_params(self, deep=True): - # if not self.biclustModel: - # self.biclustModel = self.createBiclustering() - # biclust_dict = self.biclustModel.get_params(deep) - # biclust_dict['logs'] = self.logs - # return biclust_dict - -# if __name__ == "__main__": -# from matplotlib import pyplot as plt -# X = np.random.rand(100, 30, 30) -# bcm = Biclustering2d(n_clusters=3) -# bcm.fit(X) -# X_new = bcm.transform(X) -# plt.matshow(np.squeeze(X_new[0]), cmap=plt.cm.Blues) -# plt.show() diff --git a/photonai/modelwrapper/BrainAgeNeuralNet.py b/photonai/modelwrapper/BrainAgeNeuralNet.py deleted file mode 100644 index ada9ace7..00000000 --- a/photonai/modelwrapper/BrainAgeNeuralNet.py +++ /dev/null @@ -1,42 +0,0 @@ -import numpy as np -from sklearn.base import BaseEstimator, RegressorMixin -from sklearn.utils import shuffle -from photonai.modelwrapper.KerasDNNRegressor import KerasDNNRegressor - - -class BrainAgeNeuralNet(BaseEstimator, RegressorMixin): - - def __init__(self, **kwargs): - self.estimator = KerasDNNRegressor(**kwargs) - - def fit(self, X, y): - - y = np.repeat(y, X.shape[1]) - # make patches per person a training case - print(X.shape) - X = np.reshape(X, (-1, X.shape[2], X.shape[3])) - # flatten training cases for reshape - X = np.reshape(X, (X.shape[0]), -1) - # shuffle the the data so there won't be long strands of same-aged people - X, y = shuffle(X, y, random_state=self.random_state) - - # 1. make model - self.estimator.fit(X, y) - return self - - def predict(self, X): - if not isinstance(X, np.ndarray): - print("Loading data") - X = np.asarray(X) - - X_to_predict = X.reshape(X.shape[0], X.shape[1], -1) - predict_result = [] - for i in range(X.shape[0]): - predict_interim_result = np.squeeze(self.estimator.predict(X_to_predict[i, :, :], batch_size=self.batch_size)) - # predict_interim_result = self.photon_rfr.predict(X_to_predict[i, :, :]) - predict_result_to_append = np.mean(predict_interim_result) - predict_result.append(predict_result_to_append) - return predict_result - - - diff --git a/photonai/modelwrapper/Brain_Age_Random_Forest.py b/photonai/modelwrapper/Brain_Age_Random_Forest.py deleted file mode 100644 index e0aff003..00000000 --- a/photonai/modelwrapper/Brain_Age_Random_Forest.py +++ /dev/null @@ -1,77 +0,0 @@ -import numpy as np -import tensorflow as tf -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.model_selection import ShuffleSplit -from sklearn.ensemble import RandomForestRegressor -from sklearn.svm import LinearSVR -from sklearn.utils import shuffle - -class Brain_Age_Random_Forest(BaseEstimator, ClassifierMixin): - # todo: BUGFIX --> pooling doesnt work - def __init__(self, target_dimension=2, - loss='mse', metrics=['accuracy'], - gpu_device='/gpu:0', random_state = 42, - early_stopping_flag=True, eaSt_patience=20, - reLe_factor=0.4, reLe_patience=5): - - self.target_dimension = target_dimension - self.loss = loss - self.metrics = metrics - self.random_state = random_state - self.gpu_device = gpu_device - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - - self.x = None - self.y_ = None - self.model = None - - def fit(self, X, y): - # Reshape X to add dimension for CNN (RGB channel) - if not isinstance(X, np.ndarray): - print("Loading data") - X = np.asarray(X) - - print("Starting Fitting") - y = np.repeat(y, X.shape[1]) - #make patches per person a training case - print(X.shape) - X = np.reshape(X, (-1, X.shape[2], X.shape[3])) - #flatten training cases for reshape - X = np.reshape(X, (X.shape[0], -1)) - #shuffle the the data so there won't be long strands of same-aged people - X, y = shuffle(X, y, random_state=42) - - #model is a random forest regressor - self.photon_rfr = LinearSVR() - # self.photon_rfr = RandomForestRegressor() - self.photon_rfr.fit(X, y) - print("Fitting done") - - return self - - def predict(self, X): - - if not isinstance(X, np.ndarray): - print("Loading data") - X = np.asarray(X) - - X_to_predict = X.reshape(X.shape[0], X.shape[1], -1) - predict_result = [] - for i in range(X.shape[0]): - predict_interim_result = self.photon_rfr.predict(X_to_predict[i, :, :]) - predict_result_to_append = np.mean(predict_interim_result) - predict_result.append(predict_result_to_append) - return predict_result - - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot diff --git a/photonai/modelwrapper/Brain_Age_Splitting_Wrapper.py b/photonai/modelwrapper/Brain_Age_Splitting_Wrapper.py deleted file mode 100644 index 6251b3bc..00000000 --- a/photonai/modelwrapper/Brain_Age_Splitting_Wrapper.py +++ /dev/null @@ -1,92 +0,0 @@ -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.cluster import SpectralBiclustering -from skimage.util.shape import view_as_windows -import math -import numpy as np -import os -#from photonai.photonlogger.Logger import Logger - -class Brain_Age_Splitting_Wrapper(BaseEstimator, TransformerMixin): - _estimator_type = "transformer" - - def __init__(self, patch_size = 25, random_state=42, logs=''): - self.patch_size = patch_size - self.random_state = random_state - if logs: - self.logs = logs - else: - self.logs = os.getcwd() - - def fit(self, X, y=None): - pass - - def transform(self, X): - - BenisLänge = X.shape[1] - (self.patch_size - 1) - BenisBreite = X.shape[2]- (self.patch_size - 1) - - BenisLängsSchritte = BenisLänge / self.patch_size - BenisBreitenSchritte = BenisBreite / self.patch_size - - KleineBenisLängsSchritte = int(np.ceil(BenisLängsSchritte)) - KleineBenisBreitenSchritte = int(np.ceil(BenisBreitenSchritte)) - BenisSteppos = KleineBenisLängsSchritte * KleineBenisBreitenSchritte - - MegaBenis = BenisSteppos * X.shape[3] - Beniswertos = np.ones((MegaBenis, self.patch_size, self.patch_size, 1)) - print(Beniswertos.shape) - - for i in range(X.shape[0]): - # print(brain_scans[i,:,:,:].shape) - # make brain scans a new variable or try making the statement for the ith one - # x.reshape(x.shape[0] // 2, 2, x.shape[1] // 2, 2).swapaxes(1, 2).reshape(-1, 2, 2) - Benis = view_as_windows(X[i, :, :, :], (self.patch_size, self.patch_size, 1), step=1) - #print(Benis.shape) - - BenisLänge = Benis.shape[0] - BenisBreite = Benis.shape[1] - #BenisSchritte = BenisLänge / self.patch_size - - BenisMatrix = Benis[0:BenisLänge:self.patch_size, 0:BenisBreite:self.patch_size, :, :] - #print(BenisMatrix.shape) - - # TODO: Reshape First 3 Matrix Dimensions into 1, which will give 900 images - BenisMatrix = BenisMatrix.reshape((-1, BenisMatrix.shape[3], BenisMatrix.shape[4], BenisMatrix.shape[5])) - #print(BenisMatrix.shape) - - Beniswertos = np.append(Beniswertos, BenisMatrix, axis=3) - #print(Beniswertos.shape) - - #TODO: Drop first row - Beniswertos = np.delete(Beniswertos, 0, 3) - Beniswertos = np.moveaxis(Beniswertos, 3, 0) - print(Beniswertos.shape) - - return Beniswertos - - # ToDo: add these functions again - # def set_params(self, **params): - # if 'n_components' in params: - # self.n_clusters = params['n_components'] - # if 'logs' in params: - # self.logs = params.pop('logs', None) - # - # if not self.biclustModel: - # self.biclustModel = self.createBiclustering() - # self.biclustModel.set_params(**params) - # - # def get_params(self, deep=True): - # if not self.biclustModel: - # self.biclustModel = self.createBiclustering() - # biclust_dict = self.biclustModel.get_params(deep) - # biclust_dict['logs'] = self.logs - # return biclust_dict - -# if __name__ == "__main__": -# from matplotlib import pyplot as plt -# X = np.random.rand(100, 30, 30) -# bcm = Biclustering2d(n_clusters=3) -# bcm.fit(X) -# X_new = bcm.transform(X) -# plt.matshow(np.squeeze(X_new[0]), cmap=plt.cm.Blues) -# plt.show() diff --git a/photonai/modelwrapper/CNN1d.py b/photonai/modelwrapper/CNN1d.py deleted file mode 100644 index f5c0ade6..00000000 --- a/photonai/modelwrapper/CNN1d.py +++ /dev/null @@ -1,186 +0,0 @@ -import keras -import keras.optimizers -import numpy as np -import tensorflow as tf -from keras.layers import Dropout, Dense, Conv1D, MaxPooling1D, Flatten, GaussianNoise -from keras.layers.core import Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Sequential -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.model_selection import ShuffleSplit - -class CNN1d(BaseEstimator, ClassifierMixin): - # todo: BUGFIX --> pooling doesnt work - def __init__(self, target_dimension=2, filter_per_block=[16], kernel_size=3, - pooling_size=3, stride=5, size_last_layer=10, - n_convolutions_per_block=1, gaussian_noise=0, - act_func='relu', learning_rate=0.001, - dropout_rate=0, batch_normalization=True, - nb_epochs=200, batch_size=64, - loss='categorical_crossentropy', metrics=['accuracy'], - optimizer='adam', gpu_device='/gpu:0', - early_stopping_flag=True, eaSt_patience=20, - reLe_factor=0.4, reLe_patience=5): - - self.target_dimension = target_dimension - self.filter_per_block = filter_per_block - self.kernel_size = kernel_size - self.n_convolutions_per_block = n_convolutions_per_block - self.pooling_size = pooling_size - self.stride = stride - self.size_last_layer = size_last_layer - self.act_func = act_func - self.lr = learning_rate - self.dropout = dropout_rate - self.batch_normalization = batch_normalization - self.nb_epochs = nb_epochs - self.loss = loss - self.metrics = metrics - self.optimizer = optimizer - self.batch_size = batch_size - self.gpu_device = gpu_device - self.gaussian_noise = gaussian_noise - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - - self.x = None - self.y_ = None - self.model = None - - def fit(self, X, y): - if self.target_dimension > 1: - y = self.dense_to_one_hot(y, self.target_dimension) - - self.model = self.create_model(X.shape) - - # Reshape X to add dimension for CNN (RGB channel) - X = np.reshape(X, (X.shape[0], X.shape[1], 1)) - - # use callbacks only when size of training set is above 100 - if X.shape[-1] > 100: - # get pseudo validation set for keras callbacks - splitter = ShuffleSplit(n_splits=1, test_size=0.2) - for train_index, val_index in splitter.split(X): - X_train = X[train_index] - X_val = X[val_index] - y_train = y[train_index] - y_val = y[val_index] - - # register callbacks - callbacks_list = [] - # use early stopping (to save time; - # does not improve performance as checkpoint will find the best model anyway) - if self.early_stopping_flag: - early_stopping = EarlyStopping(monitor='val_loss', - patience=self.eaSt_patience) - callbacks_list += [early_stopping] - - # adjust learning rate when not improving for patience epochs - reduce_lr = ReduceLROnPlateau(monitor='val_loss', - factor=self.reLe_factor, - patience=self.reLe_patience, - min_lr=0.001, verbose=0) - callbacks_list += [reduce_lr] - - # fit the model - results = self.model.fit(X_train, y_train, - validation_data=(X_val, y_val), - batch_size=128, - epochs=self.nb_epoch, - verbose=0, - callbacks=callbacks_list) - else: - # fit the model - print( - 'Cannot use Keras Callbacks because of small sample size...') - results = self.model.fit(X, y, batch_size=128, - epochs=self.nb_epoch, - verbose=0) - - return self - - def predict(self, X): - X = np.reshape(X, (X.shape[0], X.shape[1], 1)) - if self.target_dimension > 1: - predict_result = self.model.predict(X, batch_size=self.batch_size) - max_index = np.argmax(predict_result, axis=1) - return max_index - else: - return self.model.predict(X, batch_size=self.batch_size) - - def create_model(self, input_shape): - model = Sequential() - input_shape = (input_shape[1], input_shape[2]) - for ind_blocks in range(len(self.filter_per_block)): - for ind_convs in range(self.n_convolutions_per_block): - if ind_blocks == 0 and ind_convs == 0: - with tf.device(self.gpu_device): - model.add(Conv1D(self.filter_per_block[ind_blocks], - self.kernel_size, - strides=self.stride, - padding='same', - input_shape=input_shape)) - model.add(Activation(self.act_func)) - if self.batch_normalization: - model.add(BatchNormalization()) - else: - with tf.device(self.gpu_device): - model.add(Conv1D(self.filter_per_block[ind_blocks], - self.kernel_size, - strides=self.stride, - padding='same')) - model.add(Activation(self.act_func)) - - if self.batch_normalization: - model.add(BatchNormalization()) - with tf.device(self.gpu_device): - if self.pooling_size: - model.add(MaxPooling1D(pool_size=self.pooling_size)) - - if self.dropout: - model.add(Dropout(self.dropout)) - - with tf.device(self.gpu_device): - model.add(Flatten()) - if self.gaussian_noise: - model.add(GaussianNoise(stddev=self.gaussian_noise)) - model.add(Dense(self.size_last_layer)) - model.add(Activation(self.act_func)) - if self.dropout: - model.add(Dropout(self.dropout)) - if self.batch_normalization: - model.add(BatchNormalization()) - with tf.device(self.gpu_device): - model.add(Dense(self.target_dimension)) - model.add(Activation('softmax')) - - optimizer = self.define_optimizer(optimizer_type=self.optimizer, - lr=self.lr) - - model.compile(loss=self.loss, optimizer=optimizer, metrics=self.metrics) - model.summary() - return model - - @staticmethod - def define_optimizer(optimizer_type='Adam', lr=0.001): - # Todo: use kwargs to allow for additional optimizer tweaking - try: - optimizer_class = getattr(keras.optimizers, optimizer_type) - optimizer = optimizer_class(lr=lr) - except AttributeError as ae: - raise ValueError('Could not find optimizer:', - optimizer_type, ' - check spelling!') - - return optimizer - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot \ No newline at end of file diff --git a/photonai/modelwrapper/CNN1d_Autoencoder.py b/photonai/modelwrapper/CNN1d_Autoencoder.py deleted file mode 100644 index 94a63e67..00000000 --- a/photonai/modelwrapper/CNN1d_Autoencoder.py +++ /dev/null @@ -1,118 +0,0 @@ -import keras -import keras.optimizers -import numpy as np -import tensorflow as tf -from keras.layers import Dropout, Dense, LSTM -from keras.layers.core import Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Sequential -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.model_selection import ShuffleSplit - -from keras.layers import Conv1D, GlobalMaxPool1D, Dense, Flatten -from keras.models import Sequential -from keras.callbacks import ModelCheckpoint - - - -class CNN1d_Autoencoder(BaseEstimator, ClassifierMixin): - def __init__(self): - self.model = None - self.time_window_size = None - self.metric = None - self.threshold = 5.0 - self.config = None - self.VERBOSE = 1 - - self.batch_size = 8 - self.epochs = 100 - self.validation_split = 0.1 - self.metric = 'mean_absolute_error' - self.estimated_negative_sample_ratio = 0.9 - - self.x = None - self.y_ = None - self.model = None - - def predict(self, X): - X = np.reshape(X, (X.shape[0], 1, X.shape[1])) - if self.target_dimension > 1: - predict_result = self.model.predict(X, batch_size=self.batch_size) - max_index = np.argmax(predict_result, axis=1) - return max_index - else: - return self.model.predict(X, batch_size=self.batch_size) - - def predict_proba(self, X): - """ - Predict probabilities - :param X: array-like - :type data: float - :return: predicted values, array - """ - # First, reshape X to meet LSTM input requirements - X = np.reshape(X, (X.shape[0], 1, X.shape[1])) - return self.model.predict(X, batch_size=self.batch_size) - - def create_model(self, time_window_size, metric): - model = Sequential() - model.add(Conv1D(filters=256, kernel_size=5, padding='same', activation='relu', - input_shape=(time_window_size, 1))) - model.add(GlobalMaxPool1D()) - - model.add(Dense(units=time_window_size, activation='linear')) - - model.compile(optimizer='adam', loss='mean_squared_error', metrics=[metric]) - print(model.summary()) - return model - - def fit(self, X, y): - self.time_window_size = X.shape[1] - - input_timeseries_dataset = np.expand_dims(X[:, :, 0], axis=2) - - self.model = self.create_model(self.time_window_size, metric=self.metric) - history = self.model.fit(x=input_timeseries_dataset, y=y, - batch_size=self.batch_size, epochs=self.epochs, - verbose=self.VERBOSE) - scores = self.predict(dataset) - scores.sort() - cut_point = int(self.estimated_negative_sample_ratio * len(scores)) - self.threshold = scores[cut_point] - - print('estimated threshold is ' + str(self.threshold)) - - self.config = dict() - self.config['time_window_size'] = self.time_window_size - self.config['metric'] = self.metric - self.config['threshold'] = self.threshold - - return self - - def predict(self, timeseries_dataset): - input_timeseries_dataset = np.expand_dims(timeseries_dataset, axis=2) - target_timeseries_dataset = self.model.predict(x=input_timeseries_dataset) - dist = np.linalg.norm(timeseries_dataset - target_timeseries_dataset, axis=-1) - return dist - - @staticmethod - def define_optimizer(optimizer_type='Adam', lr=0.001): - # Todo: use kwargs to allow for additional optimizer tweaking - try: - optimizer_class = getattr(keras.optimizers, optimizer_type) - optimizer = optimizer_class(lr=lr) - except AttributeError as ae: - raise ValueError('Could not find optimizer:', - optimizer_type, ' - check spelling!') - - return optimizer - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot \ No newline at end of file diff --git a/photonai/modelwrapper/FeatureSelection.py b/photonai/modelwrapper/FeatureSelection.py index 3dc5faae..82316661 100644 --- a/photonai/modelwrapper/FeatureSelection.py +++ b/photonai/modelwrapper/FeatureSelection.py @@ -4,7 +4,7 @@ from sklearn.feature_selection import f_regression, f_classif, SelectPercentile, \ VarianceThreshold, mutual_info_classif, mutual_info_regression, SelectKBest, chi2 from scipy.stats import pearsonr, f_oneway -from sklearn.decomposition import PCA, RandomizedPCA, IncrementalPCA +from sklearn.decomposition import PCA, IncrementalPCA from hashlib import sha1 from pathlib import Path import statsmodels.api as sm diff --git a/photonai/modelwrapper/GPR_AM.py b/photonai/modelwrapper/GPR_AM.py deleted file mode 100644 index 07bacca1..00000000 --- a/photonai/modelwrapper/GPR_AM.py +++ /dev/null @@ -1,676 +0,0 @@ -from __future__ import print_function -from __future__ import division -import numpy as np -from scipy import optimize -from numpy.linalg import solve, LinAlgError -from numpy.linalg import cholesky as chol -from six import with_metaclass -from abc import ABCMeta, abstractmethod - -from sklearn.base import BaseEstimator - - -class GPR_AM(BaseEstimator): - """ - The PHOTON interface for implementing custom pipeline elements. - - PHOTON works on top of the scikit-learn object API, - [see documentation](http://scikit-learn.org/stable/developers/contributing.html#apis-of-scikit-learn-objects) - - Your class should overwrite the following definitions: - - - `fit(data)`: learn or adjust to the data - - If it is an estimator, which means it has the ability to learn, - - - it should implement `predict(data)`: using the learned model to generate prediction - - should inherit *sklearn.base.BaseEstimator* ([see here](http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html)) - - inherits *get_params* and *set_params* - - If it is an transformer, which means it preprocesses or prepares the data - - - it should implement `transform(data)`: applying the logic to the data to transform it - - should inherit from *sklearn.base.TransformerMixin* ([see here](http://scikit-learn.org/stable/modules/generated/sklearn.base.TransformerMixin.html)) - - inherits *fit_transform* as a concatenation of both fit and transform - - should inherit *sklearn.base.BaseEstimator* ([see here](http://scikit-learn.org/stable/modules/generated/sklearn.base.BaseEstimator.html)) - - inherits *get_params* and *set_params* - - `Prepare for hyperparameter optimization` - - PHOTON expects you to `define all parameters` that you want to optimize in the hyperparameter search in the - `constructor stub`, and to be addressable with the `same name as class variable`. - In this way you can define any parameter and it is automatically prepared for the hyperparameter search process. - - See the [scikit-learn object API documentation](http://scikit-learn.org/stable/developers/contributing.html#apis-of-scikit-learn-objects) for more in depth information about the interface. - """ - - def __init__(self, respfile=None, covfile=None, maskfile=None, cvfolds=None, testcov=None, testresp=None, saveoutput=True, outputsuffix=None): - print('Richtig') - pass - - def fit(self, data=None, targets=None): - """ - Adjust the underlying model or method to the data. - - Returns - ------- - IMPORTANT: must return self! - """ - # starting hyperparameters. Could also do random restarts here - covfunc = CovSum(data, ('CovLin', 'CovSqExpARD')) - hyp0 = np.zeros(covfunc.get_n_params() + 1) - - # GPR - self.gpr = GPR(hyp0, covfunc, data, targets) - self.hyp = self.gpr.estimate(hyp0, covfunc, data, targets) - return self - - def predict(self, data): - """ - Use the learned model to make predictions. - """ - #hyp, X, y, Xs - ymu, ys2 = self.gpr.predict(hyp=self.hyp, X=np.zeros_like(data), Xs=data) - return ymu - - -# GP stuff -# -------------------- -# Covariance functions -# -------------------- -def squared_dist(x, z=None): - """ compute sum((x-z) ** 2) for all vectors in a 2d array""" - - # do some basic checks - if z is None: - z = x - if len(x.shape) == 1: - x = x[:, np.newaxis] - if len(z.shape) == 1: - z = z[:, np.newaxis] - - nx, dx = x.shape - nz, dz = z.shape - if dx != dz: - raise ValueError(""" - Cannot compute distance: vectors have different length""") - - # mean centre for numerical stability - m = np.mean(np.vstack((np.mean(x, axis=0), np.mean(z, axis=0))), axis=0) - x = x - m - z = z - m - - xx = np.tile(np.sum((x * x), axis=1)[:, np.newaxis], (1, nz)) - zz = np.tile(np.sum((z * z), axis=1), (nx, 1)) - - dist = (xx - 2 * x.dot(z.T) + zz) - - return dist - -class CovBase(with_metaclass(ABCMeta)): - """ Base class for covariance functions. - - All covariance functions must define the following methods:: - - CovFunction.get_n_params() - CovFunction.cov() - CovFunction.xcov() - CovFunction.dcov() - """ - - def __init__(self, x=None): - self.n_params = np.nan - - def get_n_params(self): - """ Report the number of parameters required """ - - assert not np.isnan(self.n_params), \ - "Covariance function not initialised" - - return self.n_params - - @abstractmethod - def cov(self, theta, x, z=None): - """ Return the full covariance (or cross-covariance if z is given) """ - - @abstractmethod - def dcov(self, theta, x, i): - """ Return the derivative of the covariance function with respect to - the i-th hyperparameter """ - - -class CovLin(CovBase): - """ Linear covariance function (no hyperparameters) - """ - - def __init__(self, x=None): - self.n_params = 0 - self.first_call = False - - def cov(self, theta, x, z=None): - if not self.first_call and not theta and theta is not None: - self.first_call = True - if len(theta) > 0 and theta[0] is not None: - print("CovLin: ignoring unnecessary hyperparameter ...") - - if z is None: - z = x - - K = x.dot(z.T) - return K - - def dcov(self, theta, x, i): - raise ValueError("Invalid covariance function parameter") - - -class CovSqExp(CovBase): - """ Ordinary squared exponential covariance function. - The hyperparameters are:: - - theta = ( log(ell), log(sf2) ) - - where ell is a lengthscale parameter and sf2 is the signal variance - """ - - def __init__(self, x=None): - self.n_params = 2 - - def cov(self, theta, x, z=None): - self.ell = np.exp(theta[0]) - self.sf2 = np.exp(2*theta[1]) - - if z is None: - z = x - - R = squared_dist(x/self.ell, z/self.ell) - K = self.sf2 * np.exp(-R/2) - return K - - def dcov(self, theta, x, i): - self.ell = np.exp(theta[0]) - self.sf2 = np.exp(2*theta[1]) - - R = squared_dist(x/self.ell, x/self.ell) - - if i == 0: # return derivative of lengthscale parameter - dK = self.sf2 * np.exp(-R/2) * R - return dK - elif i == 1: # return derivative of signal variance parameter - dK = 2*self.sf2 * np.exp(-R/2) - return dK - else: - raise ValueError("Invalid covariance function parameter") - - -class CovSqExpARD(CovBase): - """ Squared exponential covariance function with ARD - The hyperparameters are:: - - theta = (log(ell_1, ..., log_ell_D), log(sf2)) - - where ell_i are lengthscale parameters and sf2 is the signal variance - """ - - def __init__(self, x=None): - if x is None: - raise ValueError("N x D data matrix must be supplied as input") - self.D = x.shape[1] - self.n_params = self.D + 1 - - def cov(self, theta, x, z=None): - self.ell = np.exp(theta[0:self.D]) - self.sf2 = np.exp(2*theta[self.D]) - - if z is None: - z = x - - R = squared_dist(x.dot(np.diag(1./self.ell)), - z.dot(np.diag(1./self.ell))) - K = self.sf2*np.exp(-R/2) - return K - - def dcov(self, theta, x, i): - K = self.cov(theta, x) - if i < self.D: # return derivative of lengthscale parameter - dK = K * squared_dist(x[:, i]/self.ell[i], x[:, i]/self.ell[i]) - return dK - elif i == self.D: # return derivative of signal variance parameter - dK = 2*K - return dK - else: - raise ValueError("Invalid covariance function parameter") - - -class CovSum(CovBase): - """ Sum of covariance functions. These are passed in as a cell array and - intialised automatically. For example:: - - C = CovSum(x,(CovLin, CovSqExpARD)) - C = CovSum.cov(x, ) - - The hyperparameters are:: - - theta = ( log(ell_1, ..., log_ell_D), log(sf2) ) - - where ell_i are lengthscale parameters and sf2 is the signal variance - """ - - def __init__(self, x=None, covfuncnames=None): - if x is None: - raise ValueError("N x D data matrix must be supplied as input") - if covfuncnames is None: - raise ValueError("A list of covariance functions is required") - self.covfuncs = [] - self.n_params = 0 - for cname in covfuncnames: - covfunc = eval(cname + '(x)') - self.n_params += covfunc.get_n_params() - self.covfuncs.append(covfunc) - self.N, self.D = x.shape - - def cov(self, theta, x, z=None): - theta_offset = 0 - for ci, covfunc in enumerate(self.covfuncs): - n_params_c = covfunc.get_n_params() - theta_c = [theta[c] for c in - range(theta_offset, theta_offset + n_params_c)] - theta_offset += n_params_c - - if ci == 0: - K = covfunc.cov(theta_c, x, z) - else: - K += covfunc.cov(theta_c, x, z) - return K - - def dcov(self, theta, x, i): - theta_offset = 0 - for covfunc in self.covfuncs: - n_params_c = covfunc.get_n_params() - theta_c = [theta[c] for c in - range(theta_offset, theta_offset + n_params_c)] - theta_offset += n_params_c - - if theta_c: # does the variable have any hyperparameters? - if 'dK' not in locals(): - dK = covfunc.dcov(theta_c, x, i) - else: - dK += covfunc.dcov(theta_c, x, i) - return dK - -# ----------------------- -# Gaussian process models -# ----------------------- - - -class GPR: - """Gaussian process regression - - Estimation and prediction of Gaussian process regression models - - Basic usage:: - - G = GPR() - hyp = B.estimate(hyp0, cov, X, y) - ys, ys2 = B.predict(hyp, cov, X, y, Xs) - - where the variables are - - :param hyp: vector of hyperparmaters - :param cov: covariance function - :param X: N x D data array - :param y: 1D Array of targets (length N) - :param Xs: Nte x D array of test cases - :param hyp0: starting estimates for hyperparameter optimisation - - :returns: * ys - predictive mean - * ys2 - predictive variance - - The hyperparameters are:: - - hyp = ( log(sn2), (cov function params) ) # hyp is a list or array - - The implementation and notation follows Rasmussen and Williams (2006). - As in the gpml toolbox, these parameters are estimated using conjugate - gradient optimisation of the marginal likelihood. Note that there is no - explicit mean function, thus the gpr routines are limited to modelling - zero-mean processes. - - Reference: - C. Rasmussen and C. Williams (2006) Gaussian Processes for Machine Learning - - Written by A. Marquand - """ - - def __init__(self, hyp=None, covfunc=None, X=None, y=None, n_iter=100, - tol=1e-3, verbose=False): - - self.hyp = np.nan - self.nlZ = np.nan - self.tol = tol # not used at present - self.n_iter = n_iter # not used at present - self.verbose = verbose - - if (hyp is not None) and (X is not None) and (y is not None): - self.post(hyp, covfunc, X, y) - - def _updatepost(self, hyp, covfunc): - - hypeq = np.asarray(hyp == self.hyp) - if hypeq.all() and hasattr(self, 'alpha') and \ - (hasattr(self, 'covfunc') and covfunc == self.covfunc): - return False - else: - return True - - def post(self, hyp, covfunc, X, y): - """ Generic function to compute posterior distribution. - """ - - if len(X.shape) == 1: - X = X[:, np.newaxis] - self.N, self.D = X.shape - - if not self._updatepost(hyp, covfunc): - print("hyperparameters have not changed, using exising posterior") - return - - # hyperparameters - sn2 = np.exp(2*hyp[0]) # noise variance - theta = hyp[1:] # (generic) covariance hyperparameters - - if self.verbose: - print("estimating posterior ... | hyp=", hyp) - - self.K = covfunc.cov(theta, X) - self.L = chol(self.K + sn2*np.eye(self.N)) - self.alpha = solve(self.L.T, solve(self.L, y)) - self.hyp = hyp - self.covfunc = covfunc - - def loglik(self, hyp, covfunc, X, y): - """ Function to compute compute log (marginal) likelihood - """ - - # load or recompute posterior - if self._updatepost(hyp, covfunc): - try: - self.post(hyp, covfunc, X, y) - except (ValueError, LinAlgError): - print("Warning: Estimation of posterior distribution failed") - self.nlZ = 1/np.finfo(float).eps - return self.nlZ - - self.nlZ = 0.5*y.T.dot(self.alpha) + sum(np.log(np.diag(self.L))) + \ - 0.5*self.N*np.log(2*np.pi) - - # make sure the output is finite to stop the minimizer getting upset - if not np.isfinite(self.nlZ): - self.nlZ = 1/np.finfo(float).eps - - if self.verbose: - print("nlZ= ", self.nlZ, " | hyp=", hyp) - - return self.nlZ - - def dloglik(self, hyp, covfunc, X, y): - """ Function to compute derivatives - """ - - # hyperparameters - sn2 = np.exp(2*hyp[0]) # noise variance - theta = hyp[1:] # (generic) covariance hyperparameters - - # load posterior and prior covariance - if self._updatepost(hyp, covfunc): - try: - self.post(hyp, covfunc, X, y) - except (ValueError, LinAlgError): - print("Warning: Estimation of posterior distribution failed") - dnlZ = np.sign(self.dnlZ) / np.finfo(float).eps - return dnlZ - - # compute Q = alpha*alpha' - inv(K) - Q = np.outer(self.alpha, self.alpha) - \ - solve(self.L.T, solve(self.L, np.eye(self.N))) - - # initialise derivatives - self.dnlZ = np.zeros(len(hyp)) - - # noise variance - self.dnlZ[0] = -sn2*np.trace(Q) - - # covariance parameter(s) - for par in range(0, len(theta)): - # compute -0.5*trace(Q.dot(dK/d[theta_i])) efficiently - dK = covfunc.dcov(theta, X, i=par) - self.dnlZ[par+1] = -0.5*np.sum(np.sum(Q*dK.T)) - - # make sure the gradient is finite to stop the minimizer getting upset - if not all(np.isfinite(self.dnlZ)): - bad = np.where(np.logical_not(np.isfinite(self.dnlZ))) - for b in bad: - self.dnlZ[b] = np.sign(self.dnlZ[b]) / np.finfo(float).eps - - if self.verbose: - print("dnlZ= ", self.dnlZ, " | hyp=", hyp) - - return self.dnlZ - - # model estimation (optimization) - def estimate(self, hyp0, covfunc, X, y, optimizer='cg'): - """ Function to estimate the model - """ - - if optimizer.lower() == 'cg': # conjugate gradients - out = optimize.fmin_cg(self.loglik, hyp0, self.dloglik, - (covfunc, X, y), disp=True, gtol=self.tol, - maxiter=self.n_iter, full_output=1) - - elif optimizer.lower() == 'powell': # Powell's method - out = optimize.fmin_powell(self.loglik, hyp0, (covfunc, X, y), - full_output=1) - else: - raise ValueError("unknown optimizer") - - self.hyp = out[0] - self.nlZ = out[1] - self.optimizer = optimizer - - return self.hyp - - def predict(self, hyp, X, y, Xs): - """ Function to make predictions from the model - """ - - if self._updatepost(hyp, self.covfunc): - self.post(hyp, self.covfunc, X, y) - - # hyperparameters - sn2 = np.exp(2*hyp[0]) # noise variance - theta = hyp[1:] # (generic) covariance hyperparameters - - Ks = self.covfunc.cov(theta, Xs, X) - kss = self.covfunc.cov(theta, Xs) - - # predictive mean - ymu = Ks.dot(self.alpha) - - # predictive variance (for a noisy test input) - v = solve(self.L, Ks.T) - ys2 = kss - v.T.dot(v) + sn2 - - return ymu, ys2 - - -# Andres original code -def orig_imp(): - """ Estimate a normative model - - This will estimate a model in one of two settings according to the - particular parameters specified (see below): - - * under k-fold cross-validation - * estimating a training dataset then applying to a second test dataset - - The models are estimated on the basis of data stored on disk in ascii or - neuroimaging data formats (nifti or cifti). Ascii data should be in - tab or space delimited format with the number of subjects in rows and the - number of variables in columns. Neuroimaging data will be reshaped - into the appropriate format - - Basic usage:: - - estimate(respfile, covfile, [extra_arguments]) - - where the variables are defined below. Note that either the cfolds - parameter or (testcov, testresp) should be specified, but not both. - - :param respfile: response variables for the normative model - :param covfile: covariates used to predict the response variable - :param maskfile: mask used to apply to the data (nifti only) - :param cvfolds: Number of cross-validation folds - :param testcov: Test covariates - :param testresp: Test responses - :param saveoutput: Save the output to disk? Otherwise returned as arrays - :param outputsuffix: Text string to add to the output filenames - - All outputs are written to disk in the same format as the input. These are: - - :outputs: * yhat - predictive mean - * ys2 - predictive variance - * Z - deviance scores - * Rho - Pearson correlation between true and predicted responses - * pRho - parametric p-value for this correlation - * rmse - root mean squared error between true/predicted responses - * smse - standardised mean squared error - - The outputsuffix may be useful to estimate multiple normative models in the - same directory (e.g. for custom cross-validation schemes) - """ - - # load data - print("Processing data in " + respfile) - X = fileio.load(covfile) - Y, maskvol = load_response_vars(respfile, maskfile) - if len(Y.shape) == 1: - Y = Y[:, np.newaxis] - if len(X.shape) == 1: - X = X[:, np.newaxis] - Nmod = Y.shape[1] - - if testcov is not None: - # we have a separate test dataset - Xte = fileio.load(testcov) - Yte, testmask = load_response_vars(testresp, maskfile) - testids = range(X.shape[0], X.shape[0] + Xte.shape[0]) - - if len(Yte.shape) == 1: - Yte = Yte[:, np.newaxis] - if len(Xte.shape) == 1: - Xte = Xte[:, np.newaxis] - - # treat as a single train-test split - splits = CustomCV((range(0, X.shape[0]),), (testids,)) - - Y = np.concatenate((Y, Yte), axis=0) - X = np.concatenate((X, Xte), axis=0) - - # force the number of cross-validation folds to 1 - if cvfolds is not None and cvfolds != 1: - print("Ignoring cross-valdation specification (test data given)") - cvfolds = 1 - else: - # we are running under cross-validation - splits = KFold(n_splits=cvfolds) - testids = range(0, X.shape[0]) - - # find and remove bad variables from the response variables - # note: the covariates are assumed to have already been checked - nz = np.where(np.bitwise_and(np.isfinite(Y).any(axis=0), - np.var(Y, axis=0) != 0))[0] - - # starting hyperparameters. Could also do random restarts here - covfunc = CovSum(X, ('CovLin', 'CovSqExpARD')) - hyp0 = np.zeros(covfunc.get_n_params() + 1) - - # run cross-validation loop - Yhat = np.zeros_like(Y) - S2 = np.zeros_like(Y) - Z = np.zeros_like(Y) - nlZ = np.zeros((Nmod, cvfolds)) - Hyp = np.zeros((Nmod, len(hyp0), cvfolds)) - for idx in enumerate(splits.split(X)): - fold = idx[0] - tr = idx[1][0] - te = idx[1][1] - - # standardize responses and covariates, ignoring invalid entries - iy, jy = np.ix_(tr, nz) - mY = np.mean(Y[iy, jy], axis=0) - sY = np.std(Y[iy, jy], axis=0) - Yz = np.zeros_like(Y) - Yz[:, nz] = (Y[:, nz] - mY) / sY - mX = np.mean(X[tr, :], axis=0) - sX = np.std(X[tr, :], axis=0) - Xz = (X - mX) / sX - - # estimate the models for all subjects - for i in range(0, len(nz)): # range(0, Nmod): - print("Estimating model ", i + 1, "of", len(nz)) - gpr = GPR(hyp0, covfunc, Xz[tr, :], Yz[tr, nz[i]]) - Hyp[nz[i], :, fold] = gpr.estimate(hyp0, covfunc, Xz[tr, :], - Yz[tr, nz[i]]) - - yhat, s2 = gpr.predict(Hyp[nz[i], :, fold], Xz[tr, :], - Yz[tr, nz[i]], Xz[te, :]) - - Yhat[te, nz[i]] = yhat * sY[i] + mY[i] - S2[te, nz[i]] = np.diag(s2) * sY[i] ** 2 - Z[te, nz[i]] = (Y[te, nz[i]] - Yhat[te, nz[i]]) / \ - np.sqrt(S2[te, nz[i]]) - nlZ[nz[i], fold] = gpr.nlZ - - # compute performance metrics - MSE = np.mean((Y[testids, :] - Yhat[testids, :]) ** 2, axis=0) - RMSE = np.sqrt(MSE) - # for the remaining variables, we need to ignore zero variances - SMSE = np.zeros_like(MSE) - Rho = np.zeros(Nmod) - pRho = np.ones(Nmod) - iy, jy = np.ix_(testids, nz) # ids for tested samples with nonzero values - SMSE[nz] = MSE[nz] / np.var(Y[iy, jy], axis=0) - Rho[nz], pRho[nz] = compute_pearsonr(Y[iy, jy], Yhat[iy, jy]) - - # Set writing options - if saveoutput: - print("Writing output ...") - if fileio.file_type(respfile) == 'cifti' or \ - fileio.file_type(respfile) == 'nifti': - exfile = respfile - else: - exfile = None - if outputsuffix is not None: - ext = str(outputsuffix) + fileio.file_extension(respfile) - else: - ext = fileio.file_extension(respfile) - - # Write output - fileio.save(Yhat[testids, :].T, 'yhat' + ext, - example=exfile, mask=maskvol) - fileio.save(S2[testids, :].T, 'ys2' + ext, - example=exfile, mask=maskvol) - fileio.save(Z[testids, :].T, 'Z' + ext, example=exfile, mask=maskvol) - fileio.save(Rho, 'Rho' + ext, example=exfile, mask=maskvol) - fileio.save(pRho, 'pRho' + ext, example=exfile, mask=maskvol) - fileio.save(RMSE, 'rmse' + ext, example=exfile, mask=maskvol) - fileio.save(SMSE, 'smse' + ext, example=exfile, mask=maskvol) - if cvfolds is None: - fileio.save(Hyp, 'Hyp' + ext, example=exfile, mask=maskvol) - else: - for idx in enumerate(splits.split(X)): - fold = idx[0] - fileio.save(Hyp[:, :, fold], 'Hyp_' + str(fold + 1) + - ext, example=exfile, mask=maskvol) - else: - output = (Yhat, S2, Z, Rho, pRho, RMSE, SMSE) - return output diff --git a/photonai/modelwrapper/KerasAutoencoder.py b/photonai/modelwrapper/KerasAutoencoder.py deleted file mode 100644 index 3c066834..00000000 --- a/photonai/modelwrapper/KerasAutoencoder.py +++ /dev/null @@ -1,40 +0,0 @@ -from sklearn.base import BaseEstimator, RegressorMixin -import tensorflow as tf -import keras as k -from keras.layers import Input, Dense, Dropout -from keras.models import Model -from keras import backend as K -from sklearn.metrics import mean_squared_error as mae - - -class SimpleAutoencoder(BaseEstimator, RegressorMixin): - - def __init__(self, n_hidden=10, dropout_rate=0.5): - self.n_hidden = n_hidden - self.dropout_rate = dropout_rate - self.model = None - - def fit(self, X, y): - n_dim_in = X.shape[1] - x = Input(shape=(n_dim_in,)) - encoded = Dense(self.n_hidden, activation='relu')(x) - encoded = Dropout(self.dropout_rate)(encoded) - decoded = Dense(n_dim_in, activation='sigmoid')(encoded) - - # this model maps an input to its reconstruction - self.model = Model(x, decoded) - self.model.compile(optimizer='adam', loss='mse') - self.model.fit(X, X, epochs=100, batch_size=64, verbose=0) - return self - - def transform(self, X): - get_z = K.function([self.model.layers[0].input], [self.model.layers[1].output]) - return get_z([X])[0] - - def score(self, X, y): - get_recon = K.function([self.model.layers[0].input], [self.model.layers[2].output]) - decoded = get_recon([X])[0] - loss = mae(X, decoded) - return loss - - diff --git a/photonai/modelwrapper/KerasBaseEstimator.py b/photonai/modelwrapper/KerasBaseEstimator.py deleted file mode 100644 index 5ceb5d25..00000000 --- a/photonai/modelwrapper/KerasBaseEstimator.py +++ /dev/null @@ -1,38 +0,0 @@ -from keras.models import model_from_json - -class KerasBaseEstimator(object): - """base class for all Keras wrappers - """ - def __init__(self): - self.model = None - - def save(self, filename): - # serialize model to JSON - model_json = self.model.to_json() - with open(filename + ".json", "w") as json_file: - json_file.write(model_json) - # serialize weights to HDF5 - self.model.save_weights(filename + ".h5") - - def load(self, filename): - # load json and create model - json_file = open(filename + '.json', 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = model_from_json(loaded_model_json) - - # load weights into new model - loaded_model.load_weights(filename + ".h5") - self.model = loaded_model - - def load_nounzip(self, archive, element_info): - # load json and create model - loaded_model_json = archive.read(element_info['filename'] + '.json') #.decode("utf-8") - loaded_model = model_from_json(loaded_model_json) - - # load weights into new model - # ToDo: fix loading hdf5 without unzipping first - loaded_weights = archive.read(element_info['filename'] + '.h5') - loaded_model.load_weights(loaded_weights) - - self.model = loaded_model \ No newline at end of file diff --git a/photonai/modelwrapper/PretrainedCNN.py b/photonai/modelwrapper/PretrainedCNN.py deleted file mode 100644 index 9de65598..00000000 --- a/photonai/modelwrapper/PretrainedCNN.py +++ /dev/null @@ -1,294 +0,0 @@ -import os -import numpy as np -from keras.applications.inception_v3 import InceptionV3 -from keras.applications import VGG16 -from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint -from keras.layers import Dense, GlobalAveragePooling2D -from keras.layers import Input -from keras.models import Model -from keras.optimizers import Adam -from sklearn.base import ClassifierMixin, RegressorMixin, BaseEstimator -from sklearn.model_selection import KFold, ShuffleSplit -from photonai.modelwrapper.KerasBaseEstimator import KerasBaseEstimator -os.environ["CUDA_VISIBLE_DEVICES"] = "2" - - -class PretrainedCNNClassifier(BaseEstimator, ClassifierMixin, KerasBaseEstimator): - - def __init__(self, input_shape=(244,244,3), target_dimension=10, - learning_rate=0.001, nb_epoch=10000, early_stopping_flag=True, - eaSt_patience=20, reLe_factor = 0.4, reLe_patience=5, freezing_point=0, - batch_size=64, ckpt_name='', weight_class_a=1, weight_class_b=1, - hidden_layer_sizes=[], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0, amsgrad=False): - - super(KerasBaseEstimator, self).__init__() - self.input_shape = input_shape - self.learning_rate = learning_rate - self.target_dimension = target_dimension - self.nb_epoch = nb_epoch - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - self.freezing_point = freezing_point - self.model = None - self.batch_size = batch_size - self.ckpt_name = ckpt_name - self.weight_class_a = weight_class_a - self.weight_class_b = weight_class_b - self.hidden_layer_sizes = hidden_layer_sizes - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.decay = decay - self.amsgrad = amsgrad - - def post_fit(self): - return {all_metrics_collected} - - def fit(self, X, y): - - # prepare target values - # Todo: calculate number of classes? - try: - if (self.target_dimension > 1) and (y.shape[1] > 1): - y = self.dense_to_one_hot(y, self.target_dimension) - except: - pass - - # 1. make model - self.model = self.create_model() - - # 2. fit model - # start_time = time.time() - - # use callbacks only when size of training set is above 100 - if X.shape[0] > 100: - # get pseudo validation set for keras callbacks - splitter = ShuffleSplit(n_splits=1, test_size=0.2) - for train_index, val_index in splitter.split(X): - X_train = X[train_index] - X_val = X[val_index] - y_train = y[train_index] - y_val = y[val_index] - - # register callbacks - callbacks_list = [] - # use early stopping (to save time; - # does not improve performance as checkpoint will find the best model anyway) - if self.early_stopping_flag: - early_stopping = EarlyStopping(monitor='val_loss', - patience=self.eaSt_patience) - callbacks_list += [early_stopping] - - # adjust learning rate when not improving for patience epochs - reduce_lr = ReduceLROnPlateau(monitor='val_loss', - factor=self.reLe_factor, - patience=self.reLe_patience, - min_lr=0.001, verbose=1) - callbacks_list += [reduce_lr] - - if self.ckpt_name: - # checkpoint to find the model with the best validation performance later (for final testing) - checkpoint = ModelCheckpoint(self.ckpt_name, - monitor='val_acc', - verbose=0, - save_best_only=True, - mode='auto', - save_weights_only=False) - callbacks_list += [checkpoint] - - # fit the model - results = self.model.fit(X_train, y_train, - validation_data=(X_val, y_val), - batch_size=self.batch_size, - epochs=self.nb_epoch, - verbose=1, - callbacks=callbacks_list, - class_weight={0:self.weight_class_a,1:self.weight_class_b}) - else: - # fit the model - print('Cannot use Keras Callbacks because of small sample size...') - results = self.model.fit(X, y, batch_size=self.batch_size, - epochs=self.nb_epoch, - verbose=1) - - return self - - def predict(self, X): - predict_result = self.model.predict(X, batch_size=128) - return np.argmax(predict_result, axis=1) - - def score(self, X, y_true): - return np.zeros(1) - - def create_model(self): - # todo: add pretrained model as hyperparameter - # create the base pre-trained model - #base_model = VGG19(input_shape=self.input_shape, weights='imagenet', include_top=False) - #base_model = InceptionV3(input_shape=self.input_shape, weights='imagenet', include_top=False) - #base_model = VGG16(input_shape=self.input_shape, weights='imagenet', include_top=False) - base_model = VGG16(input_shape=self.input_shape, weights='imagenet', include_top=False) - - # add a global spatial average pooling layer - x = base_model.output - x = GlobalAveragePooling2D()(x) - # let's add a fully-connected layer - #x = Dense(self.size_additional_layer, activation='relu')(x) - # and a logistic layer -- let's say we have 200 classes - predictions = Dense(self.target_dimension, activation='softmax')(x) - - # this is the model we will train - model = Model(inputs=base_model.input, outputs=predictions) - - for layer in model.layers[:self.freezing_point]: - layer.trainable = False - print('Freeze layer ', layer) - for layer in model.layers[self.freezing_point:]: - layer.trainable = True - - # compile the model (should be done *after* setting layers to non-trainable) - optimizer = Adam(lr=self.learning_rate, beta_1=self.beta_1, beta_2=self.beta_2, epsilon=self.epsilon, - decay=self.decay, amsgrad=self.amsgrad) - model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) - - return model - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot - - - -class PretrainedCNNRegressor(BaseEstimator, RegressorMixin, KerasBaseEstimator): - - def __init__(self, input_shape=(244,244,3), - learning_rate=0.001, nb_epoch=10000, early_stopping_flag=True, - eaSt_patience=20, reLe_factor = 0.4, reLe_patience=5, freezing_point=0, - batch_size=64, ckpt_name='', hidden_layer_sizes=[], beta_1=0.9, beta_2=0.999, epsilon=None, decay=0, amsgrad=False): - - super(KerasBaseEstimator, self).__init__() - self.input_shape = input_shape - self.learning_rate = learning_rate - self.nb_epoch = nb_epoch - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - self.freezing_point = freezing_point - self.model = None - self.batch_size = batch_size - self.ckpt_name = ckpt_name - self.hidden_layer_sizes = hidden_layer_sizes - self.beta_1 = beta_1 - self.beta_2 = beta_2 - self.epsilon = epsilon - self.decay = decay - self.amsgrad = amsgrad - - def post_fit(self): - return {all_metrics_collected} - - def fit(self, X, y): - - # 1. make model - self.model = self.create_model() - - # 2. fit model - # start_time = time.time() - - # use callbacks only when size of training set is above 100 - if X.shape[0] > 100: - # get pseudo validation set for keras callbacks - splitter = ShuffleSplit(n_splits=1, test_size=0.2) - for train_index, val_index in splitter.split(X): - X_train = X[train_index] - X_val = X[val_index] - y_train = y[train_index] - y_val = y[val_index] - - # register callbacks - callbacks_list = [] - # use early stopping (to save time; - # does not improve performance as checkpoint will find the best model anyway) - if self.early_stopping_flag: - early_stopping = EarlyStopping(monitor='val_loss', - patience=self.eaSt_patience) - callbacks_list += [early_stopping] - - # adjust learning rate when not improving for patience epochs - reduce_lr = ReduceLROnPlateau(monitor='val_loss', - factor=self.reLe_factor, - patience=self.reLe_patience, - min_lr=0.001, verbose=1) - callbacks_list += [reduce_lr] - - if self.ckpt_name: - # checkpoint to find the model with the best validation performance later (for final testing) - checkpoint = ModelCheckpoint(self.ckpt_name, - monitor='val_loss', - verbose=0, - save_best_only=True, - mode='auto', - save_weights_only=False) - callbacks_list += [checkpoint] - - # fit the model - results = self.model.fit(X_train, y_train, - validation_data=(X_val, y_val), - batch_size=self.batch_size, - epochs=self.nb_epoch, - verbose=1, - callbacks=callbacks_list) - else: - # fit the model - print('Cannot use Keras Callbacks because of small sample size...') - results = self.model.fit(X, y, batch_size=self.batch_size, - epochs=self.nb_epoch, - verbose=1) - - return self - - def predict(self, X): - return np.squeeze(self.model.predict(X, batch_size=128)) - - def score(self, X, y_true): - return np.zeros(1) - - def create_model(self): - - # create the base pre-trained model - #base_model = VGG19(input_shape=self.input_shape, weights='imagenet', include_top=False) - #base_model = InceptionV3(input_shape=self.input_shape, weights='imagenet', include_top=False) - #base_model = VGG16(input_shape=self.input_shape, weights='imagenet', include_top=False) - base_model = VGG16(input_shape=self.input_shape, weights='imagenet', include_top=False) - - # add a global spatial average pooling layer - x = base_model.output - x = GlobalAveragePooling2D()(x) - # let's add a fully-connected layer - x = Dense(1024, activation='relu')(x) - x = Dense(512, activation='relu')(x) - predictions = Dense(1, kernel_initializer='normal')(x) - - # this is the model we will train - model = Model(inputs=base_model.input, outputs=predictions) - - for layer in model.layers[:self.freezing_point]: - layer.trainable = False - print('Freeze layer ', layer) - for layer in model.layers[self.freezing_point:]: - layer.trainable = True - - # compile the model (should be done *after* setting layers to non-trainable) - #optimizer = RMSprop(lr=self.learning_rate, rho=0.9, epsilon=0.1, decay=0.9) - optimizer = Adam(lr=self.learning_rate, beta_1=self.beta_1, beta_2=self.beta_2, epsilon=self.epsilon, - decay=self.decay, amsgrad=self.amsgrad) - model.compile(optimizer=optimizer, loss='mean_squared_error', metrics=['mse']) - - return model diff --git a/photonai/modelwrapper/PyESNWrapper.py b/photonai/modelwrapper/PyESNWrapper.py deleted file mode 100644 index a66fbd8d..00000000 --- a/photonai/modelwrapper/PyESNWrapper.py +++ /dev/null @@ -1,110 +0,0 @@ -from matplotlib.pyplot import * -from numpy import * -from scipy import linalg -from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin - - -class PyESNWrapper(): - - def __init__(self, reservoir_size=1000, leaking_rate=0.3, regularization_coeff=1e-8, spectral_radius=1.25, - in_size=1, out_size=1, init_len=100): - - self.in_size = in_size - self.out_size = out_size - self.res_size = reservoir_size - self.init_len = init_len - self.leaking_rate = leaking_rate - self.spectral_radius = spectral_radius - self.regularization_coeff = regularization_coeff - self.activation_function = tanh - self.w_in = None - self.w_out = None - self.w = None - self.x = None - - random.seed(42) - - def fit(self, data, targets): - - if self.spectral_radius >= 1 and np.count_nonzero(data == 0) > 0: - raise ValueError('ESN wont work: with a tanh sigmoid, the ESP is violated for zero input if the spectral radius of the reservoir weight matrix is larger than unity') - - self.in_size = data.shape[1] - train_len = data.shape[0] - self.w_in = (random.rand(self.res_size, 1 + self.in_size) - 0.5) * 1 - self.w = random.rand(self.res_size, self.res_size) - 0.5 - - # Option 1 - direct scaling (quick&dirty, reservoir-specific): - # W *= 0.135 - # Option 2 - normalizing and setting spectral radius (correct, slow): - print('Computing spectral radius...', ) - rhoW = max(abs(linalg.eig(self.w)[0])) - print('..done.') - self.w *= self.spectral_radius / rhoW - - # allocated memory for the design (collected states) matrix - X = zeros((1 + self.in_size + self.res_size, train_len - self.init_len)) - # set the corresponding target matrix directly - Yt = targets.T - - # run the reservoir with the data and collect X - self.x = zeros((self.res_size, 1)) - for t in range(train_len): - u = data[t, :] - # stacked_vectors = vstack((np.ones((u.shape[0],1)).flatten(), u)) - bias_u = self.add_bias_unit(u) - self.calculate_state(bias_u) - if t >= self.init_len: - stack_input_and_output = concatenate((bias_u, self.x)) - X[:, t - self.init_len] = stack_input_and_output - - # train the output - X_T = X.T - y_times_transposed_states = dot(Yt, X_T) - state_times_transposed_states = dot(X, X_T) - regularization_matrix = self.regularization_coeff * eye(1 + self.in_size + self.res_size) - # nr_of_infs_reg = np.count_nonzero(~np.isnan(data)) - # nr_infs_reg = np.count_nonzero(np.isinf(regularization_matrix)) - nr_infs_state = np.count_nonzero(np.isinf(state_times_transposed_states)) - inverse = linalg.inv(state_times_transposed_states + regularization_matrix) - self.w_out = dot(y_times_transposed_states, inverse) - # Wout = dot( Yt, linalg.pinv(X) ) - return self - - def predict(self, data): - testLen = data.shape[0] - # run the trained ESN in a generative mode. no need to initialize here, - # because x is initialized with training data and we continue from there. - Y = zeros((testLen, self.out_size)) - for t in range(testLen): - u = data[t] - bias_u = self.add_bias_unit(u) - self.calculate_state(bias_u) - concated_data = concatenate((bias_u, self.x)) - y = dot(self.w_out, concated_data) - Y[t, :] = y - - # generative mode: - # u = y - # predictive mode - # u = data[trainLen + t + 1] - return Y - - @staticmethod - def add_bias_unit(u): - return np.insert(u, 0, 1) - - def calculate_state(self, bias_u): - # x = (1 - a) * x + a * tanh(dot(Win, data_stack) + dot(W, x)) - last_echo = ((1 - self.leaking_rate) * self.x).flatten() - input_weights_times_data = dot(self.w_in, bias_u).flatten() - state_times_weights = dot(self.w, self.x).flatten() - self.x = last_echo + self.leaking_rate * self.activation_function(input_weights_times_data + state_times_weights) - - -class PyESNRegressor(BaseEstimator, RegressorMixin, PyESNWrapper): - pass - - -class PyESNClassifier(BaseEstimator, ClassifierMixin, PyESNWrapper): - pass diff --git a/photonai/modelwrapper/SiameseDNNClassifier.py b/photonai/modelwrapper/SiameseDNNClassifier.py deleted file mode 100644 index dd4a0ee7..00000000 --- a/photonai/modelwrapper/SiameseDNNClassifier.py +++ /dev/null @@ -1,286 +0,0 @@ -import numpy as np -from keras import backend as K -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from keras.layers import Dropout, Dense, Input, Lambda -from keras.layers.core import Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Sequential, Model -from keras.optimizers import Adam -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.model_selection import ShuffleSplit - -from Helpers.TFUtilities import oneHot - - -class SiameseDNNClassifier(BaseEstimator, ClassifierMixin): - - def __init__(self, input_dim=10, n_pairs_per_sample =2 , target_dimension = 10, dropout_rate=0.5, act_func='relu', - learning_rate=0.1, batch_normalization=True, nb_epoch=10000, early_stopping_flag=True, - eaSt_patience=20, reLe_factor = 0.4, reLe_patience=5): - - self.target_dimension = target_dimension - self.dropout_rate = dropout_rate - self.act_func = act_func - self.learning_rate = learning_rate - self.batch_normalization = batch_normalization - self.nb_epoch = nb_epoch - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - self.input_dim = input_dim - self.n_pairs_per_sample = n_pairs_per_sample - self.model = None - - def fit(self, X, y): - - # 1. make model - siamese_model = self.create_model() - - # 2. fit model - # start_time = time.time() - - # use callbacks only when size of training set is above 100 - if X.shape[0] > 100: - # get pseudo validation set for keras callbacks - splitter = ShuffleSplit(n_splits=1, test_size=0.2) - for train_index, val_index in splitter.split(X): - X_train = X[train_index] - X_val = X[val_index] - y_train = y[train_index] - y_val = y[val_index] - - # register callbacks - callbacks_list = [] - # use early stopping (to save time; - # does not improve performance as checkpoint will find the best model anyway) - if self.early_stopping_flag: - early_stopping = EarlyStopping(monitor='val_loss', - patience=self.eaSt_patience) - callbacks_list += [early_stopping] - - # adjust learning rate when not improving for patience epochs - reduce_lr = ReduceLROnPlateau(monitor='val_loss', - factor=self.reLe_factor, - patience=self.reLe_patience, - min_lr=0.001, verbose=0) - callbacks_list += [reduce_lr] - - # create training+test positive and negative pairs - y_train_oh_reverse = oneHot(y_train,reverse=True) - y_val_oh_reverse = oneHot(y_val,reverse=True) - - digit_indices = [np.where(y_train_oh_reverse == i)[0] for i in range(10)] - tr_pairs, tr_y = self.create_pairs(X_train, digit_indices, self.n_pairs_per_sample) - - digit_indices = [np.where(y_val_oh_reverse == i)[0] for i in range(10)] - te_pairs, te_y = self.create_pairs(X_val, digit_indices, self.n_pairs_per_sample) - - print(tr_pairs.shape) - print(te_pairs.shape) - # fit the model - results = siamese_model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, - validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), - batch_size=128, - epochs=self.nb_epoch, - verbose=0, - callbacks=callbacks_list) - - - - # Use learnt features for classification - # prepare target values - # Todo: calculate number of classes? - try: - if (self.target_dimension > 1) and (y.shape[1] > 1): - y = self.dense_to_one_hot(y, self.target_dimension) - except: - pass - - seq_model = siamese_model.get_layer(index=2) - seq_model.layers[3] = Dropout(0.5) - new_input = Input(shape=(self.input_dim,)) - new_seq = seq_model(new_input) - new_seq = Dropout(0.5)(new_seq) - new_seq = Dense(self.target_dimension, activation='softmax')(new_seq) - self.model = Model(new_input, new_seq) - optimizer = Adam(lr=self.learning_rate) - self.model.compile(loss='categorical_crossentropy', optimizer=optimizer) - print(self.model.summary()) - results = self.model.fit(X_train, y_train, - validation_data=(X_val, y_val), - batch_size=128, - epochs=self.nb_epoch, - verbose=0, - callbacks=callbacks_list) - - else: - pass - - return self - - def predict(self, X): - predict_result = self.model.predict(X, batch_size=128) - max_index = np.argmax(predict_result, axis=1) - return self.dense_to_one_hot(max_index, self.target_dimension) - - def predict_proba(self, X): - """ - Predict probabilities - :param X: array-like - :type data: float - :return: predicted values, array - """ - return self.model.predict(X, batch_size=128) - - def score(self, X, y_true): - return np.zeros(1) - - def create_model(self): - # network definition - base_network = self.create_base_network() - - input_a = Input(shape=(self.input_dim,)) - input_b = Input(shape=(self.input_dim,)) - - # because we re-use the same instance `base_network`, - # the weights of the network - # will be shared across the two branches - processed_a = base_network(input_a) - processed_b = base_network(input_b) - - distance = Lambda(self.euclidean_distance, - output_shape=self.eucl_dist_output_shape)([processed_a, processed_b]) - - model = Model([input_a, input_b], distance) - - # train - optimizer = Adam(lr=self.learning_rate) - model.compile(loss=self.contrastive_loss, optimizer=optimizer) - - return model - - @staticmethod - def euclidean_distance(vects): - x, y = vects - return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon())) - - @staticmethod - def eucl_dist_output_shape(shapes): - shape1, shape2 = shapes - return (shape1[0], 1) - - @staticmethod - def contrastive_loss(y_true, y_pred): - """Contrastive loss from Hadsell-et-al.'06 - http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf - """ - margin = 1 - return K.mean(y_true * K.square(y_pred) + - (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) - - # def create_pairs(self, x, digit_indices): - # '''Positive and negative pair creation. - # Alternates between positive and negative pairs. - # ''' - # pairs = [] - # labels = [] - # n = min([len(digit_indices[d]) for d in range(10)]) - 1 - # for d in range(10): - # for i in range(n): - # z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] - # pairs += [[x[z1], x[z2]]] - # inc = random.randrange(1, 10) - # dn = (d + inc) % 10 - # z1, z2 = digit_indices[d][i], digit_indices[dn][i] - # pairs += [[x[z1], x[z2]]] - # labels += [1, 0] - # return np.array(pairs), np.array(labels) - - def create_base_network(self): - """base network to be shared (eq. to feature extraction). - """ - seq = Sequential() - seq.add(Dense(128, input_shape=(self.input_dim,), kernel_initializer='random_uniform')) - seq.add(BatchNormalization()) - seq.add(Activation(self.act_func)) - seq.add(Dropout(0.1)) - seq.add(Dense(64, kernel_initializer='random_uniform')) - seq.add(BatchNormalization()) - seq.add(Activation(self.act_func)) - return seq - - @staticmethod - def compute_accuracy(predictions, labels): - """Compute classification accuracy with a fixed threshold on distances. - """ - - return labels[predictions.ravel() < 0.5].mean() - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot - - def create_pairs(self, x, class_indices, n_pairs_per_subject): - """Positive and negative pair creation. - Alternates between positive and negative pairs. - """ - # x: data, class_indices: lists of indices of subjects in classes - - n_sample_pairs = 2 * n_pairs_per_subject * len(class_indices[0]) * len(class_indices) - - # if n_sample_pairs > 100000: - # print('INSANE: You are trying to use ', n_sample_pairs, - # 'sample pairs.') - - print('Generating ', n_sample_pairs, 'sample pairs.') - - pairs = [] - labels = [] - n = len(class_indices[0]) - if all(len(s) == n for s in class_indices): - raise ValueError('Lists do not have the same length.') - - pos_pairs = self.draw_pos_pairs(class_indices, n_pairs_per_subject) - neg_pairs = self.draw_neg_pairs(class_indices, n_pairs_per_subject) - - for d in range(len(pos_pairs)): - z1, z2 = pos_pairs[d] - pairs += [[x[z1], x[z2]]] - z1, z2 = neg_pairs[d] - pairs += [[x[z1], x[z2]]] - labels += [1, 0] - return np.array(pairs), np.array(labels) - - @staticmethod - def draw_pos_pairs(indices, n_pairs_per_subject): - pairs = [] - for ind_lists in range(len(indices)): - a = indices[ind_lists] - for ind_pair in range(n_pairs_per_subject): - for ind_sub in range(len(a)): - p1 = a[ind_sub] - next_ind = (ind_sub + 1 + ind_pair) % len(a) - p2 = a[next_ind] - pairs.append([p1, p2]) - return pairs - - @staticmethod - def draw_neg_pairs(indices, n_pairs_per_subject): - pairs = [] - n_classes = len(indices) - n_subs = len(indices[0]) - for ind_lists in range(n_classes): - for ind_pair in range(n_pairs_per_subject): - for ind_sub in range(len(indices[ind_lists])): - p1 = indices[ind_lists][ind_sub] - next_ind = (ind_sub + ind_pair + (ind_lists * ( - n_pairs_per_subject - 1)) + ind_lists) % len(indices[(ind_lists+ind_pair)%n_classes]) - p2 = indices[(ind_lists+ind_pair)%n_classes][next_ind] - pairs.append([p1, p2]) - return pairs diff --git a/photonai/modelwrapper/SimpleLSTM.py b/photonai/modelwrapper/SimpleLSTM.py deleted file mode 100644 index ba736201..00000000 --- a/photonai/modelwrapper/SimpleLSTM.py +++ /dev/null @@ -1,170 +0,0 @@ -import keras -import keras.optimizers -import numpy as np -import tensorflow as tf -from keras.layers import Dropout, Dense, LSTM -from keras.layers.core import Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Sequential -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.model_selection import ShuffleSplit - -class SimpleLSTM(BaseEstimator, ClassifierMixin): - # todo: BUGFIX --> pooling doesnt work - def __init__(self, target_dimension=2, units=32, size_last_layer=10, - gaussian_noise=0, act_func='relu', learning_rate=0.001, - dropout_rate=0, batch_normalization=True, - nb_epochs=200, batch_size=64, - loss='categorical_crossentropy', metrics=['accuracy'], - optimizer='adam', gpu_device='/gpu:0', - early_stopping_flag=True, eaSt_patience=20, - reLe_factor=0.4, reLe_patience=5): - - self.target_dimension = target_dimension - self.units = units - self.size_last_layer = size_last_layer - self.act_func = act_func - self.lr = learning_rate - self.dropout = dropout_rate - self.batch_normalization = batch_normalization - self.nb_epochs = nb_epochs - self.loss = loss - self.metrics = metrics - self.optimizer = optimizer - self.batch_size = batch_size - self.gpu_device = gpu_device - self.gaussian_noise = gaussian_noise - self.early_stopping_flag = early_stopping_flag - self.eaSt_patience = eaSt_patience - self.reLe_factor = reLe_factor - self.reLe_patience = reLe_patience - - self.x = None - self.y_ = None - self.model = None - - def fit(self, X, y): - if self.target_dimension > 1: - y = self.dense_to_one_hot(y, self.target_dimension) - - self.model = self.create_model(X.shape) - - # Reshape X to add dimension for CNN (RGB channel) - X = np.reshape(X, (X.shape[0], 1, X.shape[1])) - - # use callbacks only when size of training set is above 100 - if X.shape[-1] > 100: - # get pseudo validation set for keras callbacks - splitter = ShuffleSplit(n_splits=1, test_size=0.2) - for train_index, val_index in splitter.split(X): - X_train = X[train_index] - X_val = X[val_index] - y_train = y[train_index] - y_val = y[val_index] - - # register callbacks - callbacks_list = [] - # use early stopping (to save time; - # does not improve performance as checkpoint will find the best model anyway) - if self.early_stopping_flag: - early_stopping = EarlyStopping(monitor='val_loss', - patience=self.eaSt_patience) - callbacks_list += [early_stopping] - - # adjust learning rate when not improving for patience epochs - reduce_lr = ReduceLROnPlateau(monitor='val_loss', - factor=self.reLe_factor, - patience=self.reLe_patience, - min_lr=0.001, verbose=0) - callbacks_list += [reduce_lr] - - # fit the model - results = self.model.fit(X_train, y_train, - validation_data=(X_val, y_val), - batch_size=128, - epochs=self.nb_epochs, - verbose=0, - callbacks=callbacks_list) - else: - # fit the model - print( - 'Cannot use Keras Callbacks because of small sample size...') - results = self.model.fit(X, y, batch_size=128, - epochs=self.nb_epochs, - verbose=0) - - return self - - def predict(self, X): - X = np.reshape(X, (X.shape[0], 1, X.shape[1])) - if self.target_dimension > 1: - predict_result = self.model.predict(X, batch_size=self.batch_size) - max_index = np.argmax(predict_result, axis=1) - return max_index - else: - return self.model.predict(X, batch_size=self.batch_size) - - def predict_proba(self, X): - """ - Predict probabilities - :param X: array-like - :type data: float - :return: predicted values, array - """ - # First, reshape X to meet LSTM input requirements - X = np.reshape(X, (X.shape[0], 1, X.shape[1])) - return self.model.predict(X, batch_size=self.batch_size) - - def create_model(self, input_shape): - model = Sequential() - - with tf.device(self.gpu_device): - model.add(LSTM(self.units, input_dim=input_shape[1], - stateful=False,return_sequences=False)) - model.add(Activation(self.act_func)) - if self.batch_normalization: - model.add(BatchNormalization()) - - with tf.device(self.gpu_device): - if self.dropout: - model.add(Dropout(self.dropout)) - - with tf.device(self.gpu_device): - model.add(Dense(self.size_last_layer)) - model.add(Activation(self.act_func)) - if self.dropout: - model.add(Dropout(self.dropout)) - if self.batch_normalization: - model.add(BatchNormalization()) - with tf.device(self.gpu_device): - model.add(Dense(self.target_dimension)) - model.add(Activation('softmax')) - - optimizer = self.define_optimizer(optimizer_type=self.optimizer, - lr=self.lr) - - model.compile(loss=self.loss, optimizer=optimizer, metrics=self.metrics) - #model.summary() - return model - - @staticmethod - def define_optimizer(optimizer_type='Adam', lr=0.001): - # Todo: use kwargs to allow for additional optimizer tweaking - try: - optimizer_class = getattr(keras.optimizers, optimizer_type) - optimizer = optimizer_class(lr=lr) - except AttributeError as ae: - raise ValueError('Could not find optimizer:', - optimizer_type, ' - check spelling!') - - return optimizer - - @staticmethod - def dense_to_one_hot(labels_dense, num_classes): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = np.arange(num_labels) * num_classes - labels_one_hot = np.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot \ No newline at end of file diff --git a/photonai/modelwrapper/TestWrapper.py b/photonai/modelwrapper/TestWrapper.py deleted file mode 100644 index 308c90f8..00000000 --- a/photonai/modelwrapper/TestWrapper.py +++ /dev/null @@ -1,36 +0,0 @@ -from sklearn.base import BaseEstimator, ClassifierMixin - - -class WrapperTestElement(BaseEstimator, ClassifierMixin): - """ - This ugly class is used to test the data streaming through the pipeline - """ - - def __init__(self, any_param=1): - self.data_dict = {} - self.any_param = any_param - - def fit(self, X, y): - self.data_dict['fit_X'] = X - self.data_dict['fit_y'] = y - return self - - def transform(self, X): - self.data_dict['transform_X'] = X - return X - - def predict(self, X): - self.data_dict['predict_X'] = X - return X - - def fit_transform(self, X, y): - self.data_dict['fit_transform_X'] = X - self.data_dict['fit_transform_y'] = y - self.fit(X, y) - return self.transform(X) - - def fit_predict(self, X, y): - self.data_dict['fit_predict_X'] = X - self.data_dict['fit_predict_y'] = y - self.fit(X, y) - return self.predict(X) diff --git a/photonai/test/PhotonBatchTests.py b/photonai/test/PhotonBatchTests.py index b2dfacca..223244f5 100644 --- a/photonai/test/PhotonBatchTests.py +++ b/photonai/test/PhotonBatchTests.py @@ -13,7 +13,7 @@ def __init__(self): def fit(self, X, y, **kwargs): pass - def transform(self, X, y, **kwargs): + def transform(self, X, y=None, **kwargs): X_new = [] for i, x in enumerate(X): X_new.append([str(sub_x) + str(y[i]) for sub_x in x]) diff --git a/photonai/test/PipelineElementTests.py b/photonai/test/PipelineElementTests.py index 6ca60c1d..897e458f 100644 --- a/photonai/test/PipelineElementTests.py +++ b/photonai/test/PipelineElementTests.py @@ -317,7 +317,7 @@ def test_stacking_of_branches(self): stacking_element += branch2 stacking_element.fit(self.X, self.y) - trans = stacking_element.transform(self.X) + trans, _, _ = stacking_element.transform(self.X) pred = stacking_element.predict(self.X) self.assertTrue(np.array_equal(trans, pred)) diff --git a/photonai/test/PipelineTests.py b/photonai/test/PipelineTests.py index a70a7bca..6fc4f739 100644 --- a/photonai/test/PipelineTests.py +++ b/photonai/test/PipelineTests.py @@ -95,7 +95,6 @@ def test_regular_use(self): # sk_transformed_X = sk_pipe.transform(X) # self.assertTrue(np.array_equal(photon_transformed_X, sk_transformed_X)) - def test_no_estimator(self): no_estimator_pipe = PhotonPipeline([("StandardScaler", self.p_ss), ("PCA", self.p_pca)])