choose block size for block bootstrap Pt.3

Techtonique · Jul 11, 2024 · 0f2984d · 0f2984d
1 parent f16b402
commit 0f2984d
Show file tree

Hide file tree

Showing 7 changed files with 104 additions and 48 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,9 +1,10 @@
-# version 0.22.4
+# version 0.22.7
 
 - Implement new types of predictive simulation intervals (`type_pi`s): independent bootstrap, block bootstrap, 2 variants of split conformal prediction in class `MTS` (see updated docs)
 - Gaussian prediction intervals `type_pi == "gaussian"` in class `MTS`
 - Implement Winkler score in `LazyMTS` and `LazyDeepMTS` for probabilistic forecasts
 - Use conformalized `Estimator`s in `MTS` (see `examples/mts_conformal_not_sims.py`)
+- Include `block_size` for block bootstrapping methods for `*MTS` classes 
 
 # version 0.20.6
 

diff --git a/nnetsauce/deep/deepMTS.py b/nnetsauce/deep/deepMTS.py
@@ -61,6 +61,23 @@ class DeepMTS(MTS):
         lags: int.
             number of lags used for each time series.
 
+        type_pi: str.
+            type of prediction interval; currently:
+            - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case
+            - "kde": based on Kernel Density Estimation of in-sample residuals
+            - "bootstrap": based on independent bootstrap of in-sample residuals
+            - "block-bootstrap": based on basic block bootstrap of in-sample residuals
+            - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals
+            - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals
+            - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals
+            - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals
+            - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals
+            - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals
+
+        block_size: int.
+            size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap").
+            Default is round(3.15*(n_residuals^1/3))
+
         replications: int.
             number of replications (if needed, for predictive simulation). Default is 'None'.
 
@@ -186,6 +203,7 @@ def __init__(
         type_scaling=("std", "std", "std"),
         lags=1,
         type_pi="kde",
+        block_size=None,
         replications=None,
         kernel=None,
         agg="mean",
@@ -232,6 +250,7 @@ def __init__(
             type_scaling=type_scaling,
             seed=seed,
             type_pi=type_pi,
+            block_size=block_size,
             replications=replications,
             kernel=kernel,
             agg=agg,

diff --git a/nnetsauce/lazypredict/lazyMTS.py b/nnetsauce/lazypredict/lazyMTS.py
@@ -162,6 +162,7 @@ def __init__(
         type_scaling=("std", "std", "std"),
         lags=1,
         type_pi="kde",
+        block_size=None,
         replications=None,
         kernel=None,
         agg="mean",
@@ -194,6 +195,7 @@ def __init__(
             backend=backend,
             lags=lags,
             type_pi=type_pi,
+            block_size=block_size,
             replications=replications,
             kernel=kernel,
             agg=agg,
@@ -205,11 +207,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
 
         Parameters:
 
-            X_train: array-like,
+            X_train: array-like or data frame,
                 Training vectors, where rows is the number of samples
                 and columns is the number of features.
 
-            X_test: array-like,
+            X_test: array-like or data frame,
                 Testing vectors, where rows is the number of samples
                 and columns is the number of features.
 
@@ -316,6 +318,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                                         type_scaling=self.type_scaling,
                                         lags=self.lags,
                                         type_pi=self.type_pi,
+                                        block_size=self.block_size,
                                         replications=self.replications,
                                         kernel=self.kernel,
                                         agg=self.agg,
@@ -326,7 +329,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                                 ),
                             ]
                         )
-                    else: # "random_state" in model().get_params().keys()
+                    else:  # "random_state" in model().get_params().keys()
                         pipe = Pipeline(
                             steps=[
                                 ("preprocessor", preprocessor),
@@ -347,6 +350,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                                         type_scaling=self.type_scaling,
                                         lags=self.lags,
                                         type_pi=self.type_pi,
+                                        block_size=self.block_size,
                                         replications=self.replications,
                                         kernel=self.kernel,
                                         agg=self.agg,
@@ -365,9 +369,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
 
                     X_pred = pipe["regressor"].predict(
                         h=X_test.shape[0], **kwargs
-                    )                    
+                    )
 
-                    if (self.replications is not None) or (self.type_pi=="gaussian"):                        
+                    if (self.replications is not None) or (
+                        self.type_pi == "gaussian"
+                    ):
                         rmse = mean_squared_error(
                             X_test, X_pred.mean, squared=False
                         )
@@ -385,7 +391,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                     MAE.append(mae)
                     MPL.append(mpl)
 
-                    if (self.replications is not None) or (self.type_pi=="gaussian"):
+                    if (self.replications is not None) or (
+                        self.type_pi == "gaussian"
+                    ):
                         WINKLERSCORE.append(winklerscore)
                         COVERAGE.append(coveragecalc)
                     TIME.append(time.time() - start)
@@ -395,7 +403,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                         CUSTOM_METRIC.append(custom_metric)
 
                     if self.verbose > 0:
-                        if (self.replications is not None) or (self.type_pi=="gaussian"):
+                        if (self.replications is not None) or (
+                            self.type_pi == "gaussian"
+                        ):
                             scores_verbose = {
                                 "Model": name,
                                 "RMSE": rmse,
@@ -447,6 +457,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                             type_scaling=self.type_scaling,
                             lags=self.lags,
                             type_pi=self.type_pi,
+                            block_size=self.block_size,
                             replications=self.replications,
                             kernel=self.kernel,
                             agg=self.agg,
@@ -470,17 +481,18 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                             type_scaling=self.type_scaling,
                             lags=self.lags,
                             type_pi=self.type_pi,
+                            block_size=self.block_size,
                             replications=self.replications,
                             kernel=self.kernel,
                             agg=self.agg,
                             seed=self.seed,
                             backend=self.backend,
                             show_progress=self.show_progress,
                         )
-                    
+
                     pipe.fit(X_train, xreg, **kwargs)
                     # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead`
-                    
+
                     self.models[name] = pipe
 
                     if self.preprocess is True:
@@ -495,7 +507,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                             h=X_test.shape[0], **kwargs
                         )  # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead`
 
-                    if (self.replications is not None) or (self.type_pi=="gaussian"):                        
+                    if (self.replications is not None) or (
+                        self.type_pi == "gaussian"
+                    ):
                         rmse = mean_squared_error(
                             X_test, X_pred.mean, squared=False
                         )
@@ -504,15 +518,19 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                         winklerscore = winkler_score(X_pred, X_test, level=95)
                         coveragecalc = coverage(X_pred, X_test, level=95)
                     else:
-                        rmse = mean_squared_error(X_test, X_pred.mean, squared=False)
+                        rmse = mean_squared_error(
+                            X_test, X_pred.mean, squared=False
+                        )
                         mae = mean_absolute_error(X_test, X_pred.mean)
                         mpl = mean_pinball_loss(X_test, X_pred.mean)
 
                     names.append(name)
                     RMSE.append(rmse)
                     MAE.append(mae)
                     MPL.append(mpl)
-                    if (self.replications is not None) or (self.type_pi=="gaussian"):
+                    if (self.replications is not None) or (
+                        self.type_pi == "gaussian"
+                    ):
                         WINKLERSCORE.append(winklerscore)
                         COVERAGE.append(coveragecalc)
                     TIME.append(time.time() - start)
@@ -522,7 +540,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                         CUSTOM_METRIC.append(custom_metric)
 
                     if self.verbose > 0:
-                        if (self.replications is not None) or (self.type_pi=="gaussian"):
+                        if (self.replications is not None) or (
+                            self.type_pi == "gaussian"
+                        ):
                             scores_verbose = {
                                 "Model": name,
                                 "RMSE": rmse,
@@ -554,7 +574,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                         print(name + " model failed to execute")
                         print(exception)
 
-        if (self.replications is not None) or (self.type_pi=="gaussian"):
+        if (self.replications is not None) or (self.type_pi == "gaussian"):
             scores = {
                 "Model": names,
                 "RMSE": RMSE,

diff --git a/nnetsauce/lazypredict/lazydeepMTS.py b/nnetsauce/lazypredict/lazydeepMTS.py
@@ -166,6 +166,7 @@ def __init__(
         type_scaling=("std", "std", "std"),
         lags=1,
         type_pi="kde",
+        block_size=None,
         replications=None,
         kernel=None,
         agg="mean",
@@ -199,6 +200,7 @@ def __init__(
             backend=backend,
             lags=lags,
             type_pi=type_pi,
+            block_size=block_size,
             replications=replications,
             kernel=kernel,
             agg=agg,
@@ -210,11 +212,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
 
         Parameters:
 
-            X_train : array-like,
+            X_train : array-like or data frame,
                 Training vectors, where rows is the number of samples
                 and columns is the number of features.
 
-            X_test : array-like,
+            X_test : array-like or data frame,
                 Testing vectors, where rows is the number of samples
                 and columns is the number of features.
 
@@ -321,6 +323,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                                         type_scaling=self.type_scaling,
                                         lags=self.lags,
                                         type_pi=self.type_pi,
+                                        block_size=self.block_size,
                                         replications=self.replications,
                                         kernel=self.kernel,
                                         agg=self.agg,
@@ -353,6 +356,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                                         type_scaling=self.type_scaling,
                                         lags=self.lags,
                                         type_pi=self.type_pi,
+                                        block_size=self.block_size,
                                         replications=self.replications,
                                         kernel=self.kernel,
                                         agg=self.agg,
@@ -458,6 +462,8 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                             type_clust=self.type_clust,
                             type_scaling=self.type_scaling,
                             lags=self.lags,
+                            type_pi=self.type_pi,
+                            block_size=self.block_size,
                             replications=self.replications,
                             kernel=self.kernel,
                             agg=self.agg,
@@ -480,6 +486,8 @@ def fit(self, X_train, X_test, xreg=None, **kwargs):
                             type_clust=self.type_clust,
                             type_scaling=self.type_scaling,
                             lags=self.lags,
+                            type_pi=self.type_pi,
+                            block_size=self.block_size,
                             replications=self.replications,
                             kernel=self.kernel,
                             agg=self.agg,