Add ElasticNet and LassoLars output model options

py-why · Aug 29, 2024 · cb6677e · cb6677e
1 parent 8afa4ba
commit cb6677e
Show file tree

Hide file tree

Showing 2 changed files with 91 additions and 2 deletions.
diff --git a/causaltune/models/regression.py b/causaltune/models/regression.py
@@ -0,0 +1,86 @@
+from sklearn.linear_model import ElasticNet, LassoLars
+
+
+from flaml.automl.model import SKLearnEstimator
+from flaml import tune
+
+# These models are for some reason not in the deployed version of flaml 2.2.0,
+# but in the source code they are there
+# So keep this file in the project for now
+
+
+class ElasticNetEstimator(SKLearnEstimator):
+    """The class for tuning Elastic Net regression model."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, data_size, task="regresssion", **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=0.0001, upper=1.0),
+                "init_value": 0.1,
+            },
+            "l1_ratio": {
+                "domain": tune.uniform(lower=0.0, upper=1.0),
+                "init_value": 0.5,
+            },
+            "selection": {
+                "domain": tune.choice(["cyclic", "random"]),
+                "init_value": "cyclic",
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        params["tol"] = params.get("tol", 0.0001)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "ElasticNet for regression task only"
+        self.estimator_class = ElasticNet
+
+
+class LassoLarsEstimator(SKLearnEstimator):
+    """The class for tuning Lasso model fit with Least Angle Regression a.k.a. Lars."""
+
+    """Reference: https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LassoLars.html"""
+
+    ITER_HP = "max_iter"
+
+    @classmethod
+    def search_space(cls, task=None, **params):
+        return {
+            "alpha": {
+                "domain": tune.loguniform(lower=1e-4, upper=1.0),
+                "init_value": 0.1,
+            },
+            "fit_intercept": {
+                "domain": tune.choice([True, False]),
+                "init_value": True,
+            },
+            "eps": {
+                "domain": tune.loguniform(lower=1e-16, upper=1e-4),
+                "init_value": 2.220446049250313e-16,
+            },
+        }
+
+    def config2params(self, config: dict) -> dict:
+        params = super().config2params(config)
+        if "n_jobs" in params:
+            params.pop("n_jobs")
+        return params
+
+    def __init__(self, task="regression", **config):
+        super().__init__(task, **config)
+        assert self._task.is_regression(), "LassoLars for regression task only"
+        self.estimator_class = LassoLars
+
+    def predict(self, X, **kwargs):
+        X = self._preprocess(X)
+        return self._model.predict(X, **kwargs)
diff --git a/causaltune/search/component.py b/causaltune/search/component.py
@@ -16,9 +16,10 @@
     ExtraTreesEstimator,
 )
 from flaml.automl.task.factory import task_factory
-
 import flaml
 
+from causaltune.models.regression import ElasticNetEstimator, LassoLarsEstimator
+
 
 def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
     cfg = {}
@@ -37,6 +38,8 @@ def flaml_config_to_tune_config(flaml_config: dict) -> Tuple[dict, dict, dict]:
 
 
 estimators = {
+    "elastic_net": ElasticNetEstimator,
+    "lasso_lars": LassoLarsEstimator,
     "knn": KNeighborsEstimator,
     "xgboost": XGBoostSklearnEstimator,
     "xgboost_limit_depth": XGBoostLimitDepthEstimator,
@@ -56,7 +59,7 @@ def joint_config(data_size: Tuple[int, int], estimator_list=None):
             continue
         task = task_factory("regression")
         cfg, init_params, low_cost_init_params = flaml_config_to_tune_config(
-            cls.search_space(data_size, task=task)
+            cls.search_space(data_size=data_size, task=task)
         )
 
         # Test if the estimator instantiates fine