Skip to content

Commit

Permalink
add default base learner to GeneriBooster
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Oct 13, 2024
1 parent 45e2363 commit d8e0565
Show file tree
Hide file tree
Showing 7 changed files with 92 additions and 6 deletions.
5 changes: 5 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# version 0.22.3

- Add `GenericGradientBooster` for regression and classification. See: https://thierrymoudiki.github.io/blog/2024/10/06/python/r/genericboosting
and `examples/genboost*`

# version 0.18.2

- Gaussian weights in `LSBoostRegressor` and `LSBoostClassifier` randomized hidden layer
Expand Down
49 changes: 49 additions & 0 deletions examples/genboost_regressor2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import subprocess
import sys
import os

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")


subprocess.check_call([sys.executable, "-m", "pip", "install", "matplotlib"])

import mlsauce as ms
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, LinearRegression
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.tree import DecisionTreeRegressor
from time import time
from os import chdir
from sklearn import metrics
from sklearn.datasets import fetch_openml

# Load the dataset from OpenML
boston = fetch_openml(name='boston', version=1, as_frame=True)

# Get the features and target
X = boston.data
y = boston.target

# Display the first few rows
print(X.head())
print(y.head())

np.random.seed(15029)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

X_train = X_train.astype(np.float64)
X_test = X_test.astype(np.float64)
y_train = y_train.astype(np.float64)
y_test = y_test.astype(np.float64)

obj = ms.GenericBoostingRegressor(col_sample=0.9, row_sample=0.9)
print(obj.get_params())
start = time()
obj.fit(X_train, y_train)
print(time()-start)
start = time()
print(np.sqrt(np.mean(np.square(obj.predict(X_test) - y_test))))
print(time()-start)
print(obj.obj['loss'])
30 changes: 30 additions & 0 deletions examples/lazy_booster_regression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
import mlsauce as ms
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
Expand Down Expand Up @@ -27,3 +28,32 @@
models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
print(models)


from sklearn.datasets import fetch_openml

# Load the dataset from OpenML
boston = fetch_openml(name='boston', version=1, as_frame=True)

# Get the features and target
X = boston.data
y = boston.target

# Display the first few rows
print(X.head())
print(y.head())

np.random.seed(1509)
X_train, X_test, y_train, y_test = train_test_split(X, y,
test_size=0.2)

X_train = X_train.astype(np.float64)
X_test = X_test.astype(np.float64)
y_train = y_train.astype(np.float64)
y_test = y_test.astype(np.float64)

regr = ms.LazyBoostingRegressor(verbose=0, ignore_warnings=True, #n_jobs=2,
custom_metric=None, preprocess=True)
models, predictioms = regr.fit(X_train, X_test, y_train, y_test)
model_dictionary = regr.provide_models(X_train, X_test, y_train, y_test)
print(models)
5 changes: 3 additions & 2 deletions mlsauce/booster/_booster_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.base import ClassifierMixin
from sklearn.tree import ExtraTreeRegressor
from sklearn.preprocessing import PolynomialFeatures

try:
Expand Down Expand Up @@ -554,7 +555,7 @@ class GenericBoostingClassifier(LSBoostClassifier):
Attributes:
base_model: object
base learner.
base learner (default is ExtraTreeRegressor) to be boosted.
n_estimators: int
number of boosting iterations.
Expand Down Expand Up @@ -625,7 +626,7 @@ class GenericBoostingClassifier(LSBoostClassifier):

def __init__(
self,
base_model,
base_model=ExtraTreeRegressor(),
n_estimators=100,
learning_rate=0.1,
n_hidden_features=5,
Expand Down
5 changes: 3 additions & 2 deletions mlsauce/booster/_booster_regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin
from sklearn.tree import ExtraTreeRegressor
from sklearn.preprocessing import PolynomialFeatures

try:
Expand Down Expand Up @@ -437,7 +438,7 @@ class GenericBoostingRegressor(LSBoostRegressor):
Attributes:
base_model: object
base learner.
base learner (default is ExtraTreeRegressor) to be boosted.
n_estimators: int
number of boosting iterations.
Expand Down Expand Up @@ -517,7 +518,7 @@ class GenericBoostingRegressor(LSBoostRegressor):

def __init__(
self,
base_model,
base_model=ExtraTreeRegressor(),
n_estimators=100,
learning_rate=0.1,
n_hidden_features=5,
Expand Down
2 changes: 1 addition & 1 deletion mlsauce/lazybooster/lazyboosterregression.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ def fit(self, X_train, X_test, y_train, y_test, **kwargs):
adj_rsquared = adjusted_rsquared(
r_squared, X_test.shape[0], X_test.shape[1]
)
rmse = mean_squared_error(y_test, y_pred, squared=False)
rmse = root_mean_squared_error(y_test, y_pred)

names.append(name)
R2.append(r_squared)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
MAINTAINER_EMAIL = '[email protected]'
LICENSE = 'BSD3 Clause Clear'

__version__ = '0.22.2'
__version__ = '0.22.3'

VERSION = __version__

Expand Down

0 comments on commit d8e0565

Please sign in to comment.