diff --git a/CHANGES.md b/CHANGES.md index d65a8be3..b3ce7f5d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,9 +1,10 @@ -# version 0.22.4 +# version 0.22.7 - Implement new types of predictive simulation intervals (`type_pi`s): independent bootstrap, block bootstrap, 2 variants of split conformal prediction in class `MTS` (see updated docs) - Gaussian prediction intervals `type_pi == "gaussian"` in class `MTS` - Implement Winkler score in `LazyMTS` and `LazyDeepMTS` for probabilistic forecasts - Use conformalized `Estimator`s in `MTS` (see `examples/mts_conformal_not_sims.py`) +- Include `block_size` for block bootstrapping methods for `*MTS` classes # version 0.20.6 diff --git a/nnetsauce/deep/deepMTS.py b/nnetsauce/deep/deepMTS.py index 144ee19e..749c073a 100644 --- a/nnetsauce/deep/deepMTS.py +++ b/nnetsauce/deep/deepMTS.py @@ -61,6 +61,23 @@ class DeepMTS(MTS): lags: int. number of lags used for each time series. + type_pi: str. + type of prediction interval; currently: + - "gaussian": simple, fast, but: assumes stationarity of Gaussian in-sample residuals and independence in the multivariate case + - "kde": based on Kernel Density Estimation of in-sample residuals + - "bootstrap": based on independent bootstrap of in-sample residuals + - "block-bootstrap": based on basic block bootstrap of in-sample residuals + - "scp-kde": Sequential split conformal prediction with Kernel Density Estimation of calibrated residuals + - "scp-bootstrap": Sequential split conformal prediction with independent bootstrap of calibrated residuals + - "scp-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of calibrated residuals + - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals + - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals + - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals + + block_size: int. + size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). + Default is round(3.15*(n_residuals^1/3)) + replications: int. number of replications (if needed, for predictive simulation). Default is 'None'. @@ -186,6 +203,7 @@ def __init__( type_scaling=("std", "std", "std"), lags=1, type_pi="kde", + block_size=None, replications=None, kernel=None, agg="mean", @@ -232,6 +250,7 @@ def __init__( type_scaling=type_scaling, seed=seed, type_pi=type_pi, + block_size=block_size, replications=replications, kernel=kernel, agg=agg, diff --git a/nnetsauce/lazypredict/lazyMTS.py b/nnetsauce/lazypredict/lazyMTS.py index 4b0337f5..ce77a688 100644 --- a/nnetsauce/lazypredict/lazyMTS.py +++ b/nnetsauce/lazypredict/lazyMTS.py @@ -162,6 +162,7 @@ def __init__( type_scaling=("std", "std", "std"), lags=1, type_pi="kde", + block_size=None, replications=None, kernel=None, agg="mean", @@ -194,6 +195,7 @@ def __init__( backend=backend, lags=lags, type_pi=type_pi, + block_size=block_size, replications=replications, kernel=kernel, agg=agg, @@ -205,11 +207,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): Parameters: - X_train: array-like, + X_train: array-like or data frame, Training vectors, where rows is the number of samples and columns is the number of features. - X_test: array-like, + X_test: array-like or data frame, Testing vectors, where rows is the number of samples and columns is the number of features. @@ -316,6 +318,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -326,7 +329,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): ), ] ) - else: # "random_state" in model().get_params().keys() + else: # "random_state" in model().get_params().keys() pipe = Pipeline( steps=[ ("preprocessor", preprocessor), @@ -347,6 +350,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -365,9 +369,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): X_pred = pipe["regressor"].predict( h=X_test.shape[0], **kwargs - ) + ) - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): rmse = mean_squared_error( X_test, X_pred.mean, squared=False ) @@ -385,7 +391,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): MAE.append(mae) MPL.append(mpl) - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): WINKLERSCORE.append(winklerscore) COVERAGE.append(coveragecalc) TIME.append(time.time() - start) @@ -395,7 +403,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): CUSTOM_METRIC.append(custom_metric) if self.verbose > 0: - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): scores_verbose = { "Model": name, "RMSE": rmse, @@ -447,6 +457,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -470,6 +481,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -477,10 +489,10 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): backend=self.backend, show_progress=self.show_progress, ) - + pipe.fit(X_train, xreg, **kwargs) # pipe.fit(X_train, xreg=xreg) # DO xreg like in `ahead` - + self.models[name] = pipe if self.preprocess is True: @@ -495,7 +507,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): h=X_test.shape[0], **kwargs ) # X_pred = pipe.predict(h=X_test.shape[0], new_xreg=new_xreg) ## DO xreg like in `ahead` - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): rmse = mean_squared_error( X_test, X_pred.mean, squared=False ) @@ -504,7 +518,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): winklerscore = winkler_score(X_pred, X_test, level=95) coveragecalc = coverage(X_pred, X_test, level=95) else: - rmse = mean_squared_error(X_test, X_pred.mean, squared=False) + rmse = mean_squared_error( + X_test, X_pred.mean, squared=False + ) mae = mean_absolute_error(X_test, X_pred.mean) mpl = mean_pinball_loss(X_test, X_pred.mean) @@ -512,7 +528,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): RMSE.append(rmse) MAE.append(mae) MPL.append(mpl) - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): WINKLERSCORE.append(winklerscore) COVERAGE.append(coveragecalc) TIME.append(time.time() - start) @@ -522,7 +540,9 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): CUSTOM_METRIC.append(custom_metric) if self.verbose > 0: - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or ( + self.type_pi == "gaussian" + ): scores_verbose = { "Model": name, "RMSE": rmse, @@ -554,7 +574,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): print(name + " model failed to execute") print(exception) - if (self.replications is not None) or (self.type_pi=="gaussian"): + if (self.replications is not None) or (self.type_pi == "gaussian"): scores = { "Model": names, "RMSE": RMSE, diff --git a/nnetsauce/lazypredict/lazydeepMTS.py b/nnetsauce/lazypredict/lazydeepMTS.py index 6f7adaa3..d5c2ac97 100644 --- a/nnetsauce/lazypredict/lazydeepMTS.py +++ b/nnetsauce/lazypredict/lazydeepMTS.py @@ -166,6 +166,7 @@ def __init__( type_scaling=("std", "std", "std"), lags=1, type_pi="kde", + block_size=None, replications=None, kernel=None, agg="mean", @@ -199,6 +200,7 @@ def __init__( backend=backend, lags=lags, type_pi=type_pi, + block_size=block_size, replications=replications, kernel=kernel, agg=agg, @@ -210,11 +212,11 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): Parameters: - X_train : array-like, + X_train : array-like or data frame, Training vectors, where rows is the number of samples and columns is the number of features. - X_test : array-like, + X_test : array-like or data frame, Testing vectors, where rows is the number of samples and columns is the number of features. @@ -321,6 +323,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -353,6 +356,7 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_scaling=self.type_scaling, lags=self.lags, type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -458,6 +462,8 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_clust=self.type_clust, type_scaling=self.type_scaling, lags=self.lags, + type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, @@ -480,6 +486,8 @@ def fit(self, X_train, X_test, xreg=None, **kwargs): type_clust=self.type_clust, type_scaling=self.type_scaling, lags=self.lags, + type_pi=self.type_pi, + block_size=self.block_size, replications=self.replications, kernel=self.kernel, agg=self.agg, diff --git a/nnetsauce/mts/mts.py b/nnetsauce/mts/mts.py index d3c078f3..6f6671df 100644 --- a/nnetsauce/mts/mts.py +++ b/nnetsauce/mts/mts.py @@ -85,9 +85,10 @@ class MTS(Base): - "scp2-kde": Sequential split conformal prediction with Kernel Density Estimation of standardized calibrated residuals - "scp2-bootstrap": Sequential split conformal prediction with independent bootstrap of standardized calibrated residuals - "scp2-block-bootstrap": Sequential split conformal prediction with basic block bootstrap of standardized calibrated residuals - + block_size: int. - size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap") + size of block for 'type_pi' in ("block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap"). + Default is round(3.15*(n_residuals^1/3)) replications: int. number of replications (if needed, for predictive simulation). Default is 'None'. @@ -287,8 +288,8 @@ def __init__( self.return_std_ = None self.df_ = None self.residuals_ = [] - self.abs_calib_residuals_ = None - self.calib_residuals_quantile_ = None + self.abs_calib_residuals_ = None + self.calib_residuals_quantile_ = None self.residuals_sims_ = None self.kde_ = None self.sims_ = None @@ -420,7 +421,7 @@ def fit(self, X, xreg=None, **kwargs): ).tolist() ) - if self.type_pi in ( + if self.type_pi in ( "scp-kde", "scp-bootstrap", "scp-block-bootstrap", @@ -454,12 +455,12 @@ def fit(self, X, xreg=None, **kwargs): self.obj.fit(X=scaled_Z[second_half_idx, :], y=centered_y_i) self.fit_objs_[i] = deepcopy(self.obj) - self.residuals_ = np.asarray(residuals_).T + self.residuals_ = np.asarray(residuals_).T if self.type_pi == "gaussian": self.gaussian_preds_std_ = np.std(self.residuals_, axis=0) - if self.type_pi in ( + if self.type_pi in ( "scp2-kde", "scp2-bootstrap", "scp2-block-bootstrap", @@ -570,14 +571,18 @@ def predict(self, h=5, level=95, **kwargs): "scp-kde", "scp2-kde", ): # kde - if self.verbose == 1: + if self.verbose == 1: self.residuals_sims_ = tuple( - self.kde_.sample(n_samples=h, random_state=self.seed + 100 * i) + self.kde_.sample( + n_samples=h, random_state=self.seed + 100 * i + ) for i in tqdm(range(self.replications)) ) - elif self.verbose == 0: + elif self.verbose == 0: self.residuals_sims_ = tuple( - self.kde_.sample(n_samples=h, random_state=self.seed + 100 * i) + self.kde_.sample( + n_samples=h, random_state=self.seed + 100 * i + ) for i in range(self.replications) ) @@ -585,7 +590,7 @@ def predict(self, h=5, level=95, **kwargs): assert self.replications is not None and isinstance( self.replications, int ), "'replications' must be provided and be an integer" - if self.verbose == 1: + if self.verbose == 1: self.residuals_sims_ = tuple( ts.bootstrap( self.residuals_, @@ -606,36 +611,39 @@ def predict(self, h=5, level=95, **kwargs): for i in range(self.replications) ) - if self.type_pi in ( "block-bootstrap", "scp-block-bootstrap", "scp2-block-bootstrap", ): if self.block_size is None: - self.block_size = int(np.ceil(3.15*(self.residuals_.shape[0]**(1/3)))) + self.block_size = int( + np.ceil(3.15 * (self.residuals_.shape[0] ** (1 / 3))) + ) assert self.replications is not None and isinstance( self.replications, int ), "'replications' must be provided and be an integer" - if self.verbose == 1: + if self.verbose == 1: self.residuals_sims_ = tuple( ts.bootstrap( - self.residuals_, h=h, - block_size=self.block_size, - seed=self.seed + 100 * i + self.residuals_, + h=h, + block_size=self.block_size, + seed=self.seed + 100 * i, ) for i in tqdm(range(self.replications)) ) - elif self.verbose == 0: + elif self.verbose == 0: self.residuals_sims_ = tuple( ts.bootstrap( - self.residuals_, h=h, - block_size=self.block_size, - seed=self.seed + 100 * i + self.residuals_, + h=h, + block_size=self.block_size, + seed=self.seed + 100 * i, ) for i in range(self.replications) - ) + ) for _ in range(h): @@ -695,14 +703,14 @@ def predict(self, h=5, level=95, **kwargs): meanf = [] lower = [] upper = [] - + if self.type_pi in ( "scp2-kde", "scp2-bootstrap", "scp2-block-bootstrap", ): - - if self.verbose == 1: + + if self.verbose == 1: self.sims_ = tuple( ( self.mean_ @@ -712,7 +720,7 @@ def predict(self, h=5, level=95, **kwargs): ) ) elif self.verbose == 0: - self.sims_ = tuple( + self.sims_ = tuple( ( self.mean_ + self.residuals_sims_[i] @@ -721,7 +729,7 @@ def predict(self, h=5, level=95, **kwargs): ) ) else: - if self.verbose == 1: + if self.verbose == 1: self.sims_ = tuple( ( self.mean_ + self.residuals_sims_[i] diff --git a/nnetsauce/utils/timeseries.py b/nnetsauce/utils/timeseries.py index 12f2a8cb..c1334d53 100644 --- a/nnetsauce/utils/timeseries.py +++ b/nnetsauce/utils/timeseries.py @@ -106,7 +106,7 @@ def coverage(obj, actual, level=95): lt = obj.lower ut = obj.upper n_points = actual.shape[0] - + if isinstance(lt, pd.DataFrame): actual = actual.values @@ -165,7 +165,7 @@ def winkler_score(obj, actual, level=95): lt = obj.lower ut = obj.upper n_points = actual.shape[0] - + # Ensure the arrays have the same length assert ( n_points == lt.shape[0] == ut.shape[0] diff --git a/setup.py b/setup.py index 7926c59b..af3c851f 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ from codecs import open from os import path -__version__ = '0.22.6' +__version__ = '0.22.7' # get the dependencies and installs here = path.abspath(path.dirname(__file__))