add default base learner to GeneriBooster

Techtonique · Oct 13, 2024 · d8e0565 · d8e0565
1 parent 45e2363
commit d8e0565
Show file tree

Hide file tree

Showing 7 changed files with 92 additions and 6 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,8 @@
+# version 0.22.3
+
+- Add `GenericGradientBooster` for regression and classification. See: https://thierrymoudiki.github.io/blog/2024/10/06/python/r/genericboosting
+and `examples/genboost*`
+
 # version 0.18.2
 
 - Gaussian weights in `LSBoostRegressor` and `LSBoostClassifier` randomized hidden layer

diff --git a/examples/genboost_regressor2.py b/examples/genboost_regressor2.py
@@ -0,0 +1,49 @@
+import subprocess
+import sys
+import os 
+
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
+
+subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])
+
+import mlsauce as ms
+import numpy as np 
+import matplotlib.pyplot as plt
+from sklearn.linear_model import Ridge, LinearRegression
+from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
+from sklearn.tree import DecisionTreeRegressor
+from time import time
+from os import chdir
+from sklearn import metrics
+from sklearn.datasets import fetch_openml
+
+# Load the dataset from OpenML
+boston = fetch_openml(name='boston', version=1, as_frame=True)
+
+# Get the features and target
+X = boston.data
+y = boston.target
+
+# Display the first few rows
+print(X.head())
+print(y.head())
+
+np.random.seed(15029)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+X_train = X_train.astype(np.float64)
+X_test = X_test.astype(np.float64)
+y_train = y_train.astype(np.float64)
+y_test = y_test.astype(np.float64)
+
+obj = ms.GenericBoostingRegressor(col_sample=0.9, row_sample=0.9)
+print(obj.get_params())
+start = time()
+obj.fit(X_train, y_train)
+print(time()-start)
+start = time()
+print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
+print(time()-start)
+print(obj.obj['loss'])
diff --git a/examples/lazy_booster_regression.py b/examples/lazy_booster_regression.py
@@ -1,5 +1,6 @@
 import os 
 import mlsauce as ms
+import numpy as np
 from sklearn.datasets import load_diabetes
 from sklearn.datasets import fetch_california_housing
 from sklearn.model_selection import train_test_split
@@ -27,3 +28,32 @@
 models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
 model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
 print(models)
+
+
+from sklearn.datasets import fetch_openml
+
+# Load the dataset from OpenML
+boston = fetch_openml(name='boston', version=1, as_frame=True)
+
+# Get the features and target
+X = boston.data
+y = boston.target
+
+# Display the first few rows
+print(X.head())
+print(y.head())
+
+np.random.seed(1509)
+X_train, X_test, y_train, y_test = train_test_split(X, y, 
+                                                    test_size=0.2)
+
+X_train = X_train.astype(np.float64)
+X_test = X_test.astype(np.float64)
+y_train = y_train.astype(np.float64)
+y_test = y_test.astype(np.float64)
+
+regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
+                                custom_metric=None, preprocess=True)
+models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
+model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
+print(models)
diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
+from sklearn.tree import ExtraTreeRegressor
 from sklearn.preprocessing import PolynomialFeatures
 
 try:
@@ -554,7 +555,7 @@ class GenericBoostingClassifier(LSBoostClassifier):
     Attributes:
 
         base_model: object
-            base learner.
+            base learner (default is ExtraTreeRegressor) to be boosted.
 
         n_estimators: int
             number of boosting iterations.
@@ -625,7 +626,7 @@ class GenericBoostingClassifier(LSBoostClassifier):
 
     def __init__(
         self,
-        base_model,
+        base_model=ExtraTreeRegressor(),
         n_estimators=100,
         learning_rate=0.1,
         n_hidden_features=5,

diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
@@ -4,6 +4,7 @@
 import pandas as pd
 from sklearn.base import BaseEstimator
 from sklearn.base import RegressorMixin
+from sklearn.tree import ExtraTreeRegressor
 from sklearn.preprocessing import PolynomialFeatures
 
 try:
@@ -437,7 +438,7 @@ class GenericBoostingRegressor(LSBoostRegressor):
     Attributes:
 
         base_model: object
-            base learner.
+            base learner (default is ExtraTreeRegressor) to be boosted.
 
         n_estimators: int
             number of boosting iterations.
@@ -517,7 +518,7 @@ class GenericBoostingRegressor(LSBoostRegressor):
 
     def __init__(
         self,
-        base_model,
+        base_model=ExtraTreeRegressor(),
         n_estimators=100,
         learning_rate=0.1,
         n_hidden_features=5,

diff --git a/mlsauce/lazybooster/lazyboosterregression.py b/mlsauce/lazybooster/lazyboosterregression.py
@@ -289,7 +289,7 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
                 adj_rsquared = adjusted_rsquared(
                     r_squared, X_test.shape[0], X_test.shape[1]
                 )
-                rmse = mean_squared_error(y_test, y_pred, squared=False)
+                rmse = root_mean_squared_error(y_test, y_pred)
 
                 names.append(name)
                 R2.append(r_squared)

diff --git a/setup.py b/setup.py
@@ -37,7 +37,7 @@
 MAINTAINER_EMAIL = '[email protected]'
 LICENSE = 'BSD3 Clause Clear'
 
-__version__ = '0.22.2'
+__version__ = '0.22.3'
 
 VERSION = __version__