Skip to content

Commit

Permalink
Merge pull request #58 from Techtonique/sample-weight
Browse files Browse the repository at this point in the history
Sample weight + VAR and VECM
  • Loading branch information
thierrymoudiki authored Sep 18, 2024
2 parents 7812488 + eb8a224 commit 0046c5b
Show file tree
Hide file tree
Showing 16 changed files with 239 additions and 148 deletions.
4 changes: 2 additions & 2 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# version 0.24.0
# version 0.24.4

- Update `LazyDeepMTS`: **No more `LazyMTS`** class, instead, you can use `LazyDeepMTS` with `n_layers=1`
- Specify forecasting horizon in `LazyDeepMTS` (see updated docs and examples/lazy_mts_horizon.py)
- New class `ClassicalMTS` for classsical models (for now VAR and VECM adapted from statsmodels) in multivariate time series forecasting (not available in `LazyDeepMTS` yet)
- New class `ClassicalMTS` for classsical models (for now VAR and VECM adapted from statsmodels) in multivariate time series forecasting
- [`partial_fit`](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier.partial_fit) for `CustomClassifier` and `CustomRegressor`

# version 0.23.1
Expand Down
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ build-site: docs ## export mkdocs website to a folder
cp -rf nnetsauce-docs/* ../../Pro_Website/Techtonique.github.io/nnetsauce
find . -name '__pycache__' -exec rm -fr {} +

run-custom: ## run all custom examples with one command
find examples -maxdepth 2 -name "*custom*.py" -exec python3 {} \;

run-examples: ## run all examples with one command
find examples -maxdepth 2 -name "*.py" -exec python3 {} \;

Expand Down
25 changes: 24 additions & 1 deletion examples/custom_deep_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,30 @@

print(clf.score(X_test, y_test))

print("Example 2 - conformal")
print("Example 2 - not conformal with weights")

load_models = [load_breast_cancer, load_iris, load_wine]

for model in load_models:

data = model()
X = data.data
y= data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = .3, random_state = 13)

obj = SVC()

clf = ns.DeepClassifier(obj, n_layers=2, verbose=1, n_clusters=2, n_hidden_features=2)

start = time()
clf.fit(X_train, y_train, sample_weight=np.random.rand(X_train.shape[0]))
print(f"\nElapsed: {time() - start} seconds\n")

preds = clf.predict(X_test)

print(clf.score(X_test, y_test))

print("Example 3 - conformal")

for model in load_models:

Expand Down
92 changes: 0 additions & 92 deletions examples/custom_deep_classification2.py

This file was deleted.

67 changes: 67 additions & 0 deletions examples/custom_with_weights.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import nnetsauce as ns
import numpy as np
import os
from sklearn.datasets import load_breast_cancer, load_diabetes
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

print("Example 1 - classification")

X, y = load_breast_cancer(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

clf = ns.CustomClassifier(obj=LogisticRegression())

n_zeros = np.sum(y_train == 0)
n_ones = np.sum(y_train == 1)
weights = np.where(y_train == 0, 1/n_zeros, 1/n_ones)

clf.fit(X_train, y_train, sample_weight=weights)

print(clf.score(X_test, y_test))

clf.fit(X_train, y_train)

print(clf.score(X_test, y_test))

clf = ns.DeepClassifier(obj=LogisticRegression())

clf.fit(X_train, y_train, sample_weight=weights)

print(clf.score(X_test, y_test))

clf.fit(X_train, y_train)

print(clf.score(X_test, y_test))

print("Example 2 - regression")

X, y = load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=123)

reg = ns.CustomRegressor(obj=RandomForestRegressor())

weights = np.random.rand(X_train.shape[0])

reg.fit(X_train, y_train, sample_weight=weights)

print(reg.score(X_test, y_test))

reg.fit(X_train, y_train)

print(reg.score(X_test, y_test))

reg = ns.DeepRegressor(obj=RandomForestRegressor())

reg.fit(X_train, y_train, sample_weight=weights)

print(reg.score(X_test, y_test))

reg.fit(X_train, y_train)

print(reg.score(X_test, y_test))


4 changes: 2 additions & 2 deletions examples/lazy_mts_horizon.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
data = np.log(mdata).diff().dropna()

n = data.shape[0]
max_idx_train = np.floor(n*0.9)
max_idx_train = np.floor(n*0.4)
training_index = np.arange(0, max_idx_train)
testing_index = np.arange(max_idx_train, n)
df_train = data.iloc[training_index,:]
Expand Down Expand Up @@ -147,7 +147,7 @@
print(f"\n ----- Example 9 ----- \n")

regr_mts5 = ns.LazyDeepMTS(verbose=1, ignore_warnings=False, custom_metric=None,
lags = 20, n_hidden_features=7, n_clusters=2,
n_hidden_features=7, n_clusters=2,
#type_pi = "gaussian",
show_progress=False, preprocess=False,
h=5, )
Expand Down
4 changes: 2 additions & 2 deletions nnetsauce/boosting/adaBoostClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,7 +286,7 @@ def fit(self, X, y, sample_weight=None, **kwargs):

for m in range(self.n_estimators):
preds = base_learner.fit(
X, y, sample_weight=np.ravel(w_m, order="C"), **kwargs
X, y, sample_weight=w_m.ravel(), **kwargs
).predict(X)

self.base_learners_.update(
Expand Down Expand Up @@ -344,7 +344,7 @@ def fit(self, X, y, sample_weight=None, **kwargs):

for m in range(self.n_estimators):
probs = base_learner.fit(
X, y, sample_weight=np.ravel(w_m, order="C"), **kwargs
X, y, sample_weight=w_m.ravel(), **kwargs
).predict_proba(X)

np.clip(
Expand Down
21 changes: 15 additions & 6 deletions nnetsauce/custom/customClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,9 @@ def fit(self, X, y, sample_weight=None, **kwargs):
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Expand All @@ -201,16 +204,21 @@ def fit(self, X, y, sample_weight=None, **kwargs):
"""

output_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)
self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn
self.n_classes_ = len(np.unique(y)) # for compatibility with sklearn

if self.level is not None:
self.obj = PredictionSet(
obj=self.obj, method=self.pi_method, level=self.level
)

# if sample_weights, else: (must use self.row_index)
if sample_weight is not None:
self.obj.fit(
scaled_Z,
output_y,
sample_weight=np.ravel(sample_weight, order="C")[
sample_weight=sample_weight[
self.index_row_
],
].ravel(),
# **kwargs
)

Expand All @@ -234,6 +242,9 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
y: array-like, shape = [n_samples]
Subset of target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Expand All @@ -252,9 +263,7 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
self.obj.partial_fit(
scaled_Z,
output_y,
sample_weight=np.ravel(sample_weight, order="C")[
self.index_row_
],
sample_weight=sample_weight[self.index_row_].ravel(),
# **kwargs
)
except:
Expand Down
26 changes: 15 additions & 11 deletions nnetsauce/custom/customRegressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ def fit(self, X, y, sample_weight=None, **kwargs):
y: array-like, shape = [n_samples]
Target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Expand All @@ -188,24 +191,24 @@ def fit(self, X, y, sample_weight=None, **kwargs):

centered_y, scaled_Z = self.cook_training_set(y=y, X=X, **kwargs)

if self.level is not None:
self.obj = PredictionInterval(
obj=self.obj, method=self.pi_method, level=self.level
)

# if sample_weights, else: (must use self.row_index)
if sample_weight is not None:
self.obj.fit(
scaled_Z,
centered_y,
sample_weight=np.ravel(sample_weight, order="C")[
self.index_row
],
sample_weight=sample_weight[
self.index_row_
].ravel(),
**kwargs
)

return self

if self.level is not None:
self.obj = PredictionInterval(
obj=self.obj, method=self.pi_method, level=self.level
)

self.obj.fit(scaled_Z, centered_y, **kwargs)

self.X_ = X
Expand All @@ -225,6 +228,9 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
y: array-like, shape = [n_samples]
Subset of target values.
sample_weight: array-like, shape = [n_samples]
Sample weights.
**kwargs: additional parameters to be passed to
self.cook_training_set or self.obj.fit
Expand All @@ -243,9 +249,7 @@ def partial_fit(self, X, y, sample_weight=None, **kwargs):
self.obj.partial_fit(
scaled_Z,
centered_y,
sample_weight=np.ravel(sample_weight, order="C")[
self.index_row
],
sample_weight=sample_weight[self.index_row_].ravel(),
**kwargs
)
except:
Expand Down
Loading

0 comments on commit 0046c5b

Please sign in to comment.