From 31587d4e67895767fb1c335a6c7633d90090521d Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Sat, 17 Aug 2024 02:38:59 +0300 Subject: [PATCH 1/8] new parameter added in order to replace negative predictions with 0 --- python/prophet/forecaster.py | 21 +++++++++++++++++---- python/prophet/tests/test_prophet.py | 18 +++++++++++++++++- 2 files changed, 34 insertions(+), 5 deletions(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 12caabf0b..e24cf7adb 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -25,6 +25,7 @@ logger.setLevel(logging.INFO) NANOSECONDS_TO_SECONDS = 1000 * 1000 * 1000 + class Prophet(object): """Prophet forecaster. @@ -77,6 +78,7 @@ class Prophet(object): stan_backend: str as defined in StanBackendEnum default: None - will try to iterate over all available backends and find the working one holidays_mode: 'additive' or 'multiplicative'. Defaults to seasonality_mode. + negative_values: bool check to set all yhat_lower negative prediction values in the DataFrame to 0. """ def __init__( @@ -99,6 +101,7 @@ def __init__( stan_backend=None, scaling: str = 'absmax', holidays_mode=None, + negative_prediction_values=True ): self.growth = growth @@ -150,6 +153,7 @@ def __init__( self.train_component_cols = None self.component_modes = None self.train_holiday_names = None + self.negative_prediction_values = negative_prediction_values self.fit_kwargs = {} self.validate_inputs() self._load_stan_backend(stan_backend) @@ -1282,6 +1286,9 @@ def predict(self, df: pd.DataFrame = None, vectorized: bool = True) -> pd.DataFr cols.append('cap') if self.logistic_floor: cols.append('floor') + if not self.negative_prediction_values: + df['trend'] = df['trend'].clip(lower=0) + # Add in forecast components df2 = pd.concat((df[cols], intervals, seasonal_components), axis=1) df2['yhat'] = ( @@ -1444,10 +1451,16 @@ def predict_uncertainty(self, df: pd.DataFrame, vectorized: bool) -> pd.DataFram series = {} for key in ['yhat', 'trend']: - series['{}_lower'.format(key)] = self.percentile( - sim_values[key], lower_p, axis=1) - series['{}_upper'.format(key)] = self.percentile( - sim_values[key], upper_p, axis=1) + if self.negative_prediction_values: + series['{}_lower'.format(key)] = self.percentile( + sim_values[key], lower_p, axis=1) + series['{}_upper'.format(key)] = self.percentile( + sim_values[key], upper_p, axis=1) + else: + series['{}_lower'.format(key)] = np.clip( + self.percentile(sim_values[key], lower_p, axis=1), a_min=0, a_max=None) + series['{}_upper'.format(key)] = np.clip( + self.percentile(sim_values[key], upper_p, axis=1), a_min=0, a_max=None) return pd.DataFrame(series) diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 3df052d94..20b903559 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -255,6 +255,7 @@ def test_make_future_dataframe_include_history(self, daily_univariate_ts, backen assert len(future) == train.shape[0] + 3 + class TestProphetTrendComponent: def test_invalid_growth_input(self, backend): msg = 'Parameter "growth" should be "linear", ' '"logistic" or "flat".' @@ -433,6 +434,22 @@ def test_override_n_changepoints(self, daily_univariate_ts, backend): cp = m.changepoints_t assert cp.shape[0] == 15 + @pytest.mark.parametrize( + "expected", + [5.656087514685135], + ) + def test_without_negative_predictions(self, subdaily_univariate_ts, backend, expected): + test_days = 280 + train, test = train_test_split(subdaily_univariate_ts, test_days) + forecaster = Prophet(stan_backend=backend, negative_prediction_values=False) + forecaster.fit(train, seed=1237861298) + np.random.seed(876543987) + future = forecaster.make_future_dataframe(test_days, include_history=False) + future = forecaster.predict(future) + res = rmse(future["yhat"], test["y"]) + tolerance = 1e-5 + assert res == pytest.approx(expected, rel=tolerance), "backend: {}".format(forecaster.stan_backend) + class TestProphetSeasonalComponent: def test_fourier_series_weekly(self, daily_univariate_ts): @@ -877,7 +894,6 @@ def test_subdaily_holidays(self, subdaily_univariate_ts, backend): assert sum(fcst["special_day"] == 0) == 575 - class TestProphetRegressors: def test_added_regressors(self, daily_univariate_ts, backend): m = Prophet(stan_backend=backend) From 6ec43ac1c7fb9b2deefcf7154bf68451f92b79ca Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Sat, 17 Aug 2024 03:00:04 +0300 Subject: [PATCH 2/8] naming refactor --- python/prophet/forecaster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index e24cf7adb..1f8b82b40 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -78,7 +78,7 @@ class Prophet(object): stan_backend: str as defined in StanBackendEnum default: None - will try to iterate over all available backends and find the working one holidays_mode: 'additive' or 'multiplicative'. Defaults to seasonality_mode. - negative_values: bool check to set all yhat_lower negative prediction values in the DataFrame to 0. + negative_prediction_values: bool check to set all yhat_lower negative prediction values in the DataFrame to 0. """ def __init__( From a4120d1c42005bf3d7dde5aecc6aba4d14a601db Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Sat, 17 Aug 2024 14:13:09 +0300 Subject: [PATCH 3/8] refactor on parameter description --- python/prophet/forecaster.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 1f8b82b40..e95b4e0fb 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -78,7 +78,7 @@ class Prophet(object): stan_backend: str as defined in StanBackendEnum default: None - will try to iterate over all available backends and find the working one holidays_mode: 'additive' or 'multiplicative'. Defaults to seasonality_mode. - negative_prediction_values: bool check to set all yhat_lower negative prediction values in the DataFrame to 0. + negative_prediction_values: bool check to set all negative prediction values in the DataFrame to 0. """ def __init__( From 4a36f80058bb240956ccf038c61c06d39270a794 Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Sat, 17 Aug 2024 15:18:40 +0300 Subject: [PATCH 4/8] Encapsulated percentile calculation and optional clipping into a helper function --- python/prophet/forecaster.py | 59 ++++++++++++++++++++-------- python/prophet/tests/test_prophet.py | 2 +- 2 files changed, 44 insertions(+), 17 deletions(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index e95b4e0fb..1ba58ff3b 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -1189,6 +1189,37 @@ def calculate_initial_params(self, num_total_regressors: int) -> ModelParams: sigma_obs=1.0, ) + def calculate_and_clip_percentile(self, data, component, comp, lower_p, upper_p): + """ + A helper function to calculate the lower and upper percentiles for a given component. + + Parameters: + - data: dict or similar + The data structure where the calculated percentiles will be stored. + - component: str + The name of the component for which the percentiles are being calculated. + - comp: array-like + The data for which percentiles are to be calculated. + - lower_p: float + The percentile to calculate for the lower bound. + - upper_p: float + The percentile to calculate for the upper bound. + + Returns: + - None + The function directly modifies the `data` structure by adding the lower and upper percentile values. + """ + lower = self.percentile(comp, lower_p, axis=1) + upper = self.percentile(comp, upper_p, axis=1) + + if not self.negative_prediction_values: + lower = np.clip(lower, a_min=0, a_max=None) + upper = np.clip(upper, a_min=0, a_max=None) + + data[component + '_lower'] = lower + data[component + '_upper'] = upper + + def fit(self, df, **kwargs): """Fit the Prophet model. @@ -1422,14 +1453,17 @@ def predict_seasonal_components(self, df): comp = np.matmul(X, beta_c.transpose()) if component in self.component_modes['additive']: comp *= self.y_scale - data[component] = np.nanmean(comp, axis=1) + + if self.negative_prediction_values: + data[component] = np.nanmean(comp, axis=1) + else: + data[component] = np.clip(np.nanmean(comp, axis=1), a_min=0, a_max=None) + if self.uncertainty_samples: - data[component + '_lower'] = self.percentile( - comp, lower_p, axis=1, - ) - data[component + '_upper'] = self.percentile( - comp, upper_p, axis=1, + self.calculate_and_clip_percentile( + data, component, comp, lower_p, upper_p ) + return pd.DataFrame(data) def predict_uncertainty(self, df: pd.DataFrame, vectorized: bool) -> pd.DataFrame: @@ -1451,16 +1485,9 @@ def predict_uncertainty(self, df: pd.DataFrame, vectorized: bool) -> pd.DataFram series = {} for key in ['yhat', 'trend']: - if self.negative_prediction_values: - series['{}_lower'.format(key)] = self.percentile( - sim_values[key], lower_p, axis=1) - series['{}_upper'.format(key)] = self.percentile( - sim_values[key], upper_p, axis=1) - else: - series['{}_lower'.format(key)] = np.clip( - self.percentile(sim_values[key], lower_p, axis=1), a_min=0, a_max=None) - series['{}_upper'.format(key)] = np.clip( - self.percentile(sim_values[key], upper_p, axis=1), a_min=0, a_max=None) + self.calculate_and_clip_percentile( + series, key, sim_values[key], lower_p, upper_p + ) return pd.DataFrame(series) diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 20b903559..257380449 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -30,7 +30,7 @@ class TestProphetFitPredictDefault: def test_fit_predict(self, daily_univariate_ts, backend, scaling, expected): test_days = 30 train, test = train_test_split(daily_univariate_ts, test_days) - forecaster = Prophet(stan_backend=backend, scaling=scaling) + forecaster = Prophet(stan_backend=backend, scaling=scaling, negative_prediction_values=False) forecaster.fit(train, seed=1237861298) np.random.seed(876543987) future = forecaster.make_future_dataframe(test_days, include_history=False) From 8d425c21fafe9a02adee255e54e709c984509fa2 Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Sat, 17 Aug 2024 15:19:28 +0300 Subject: [PATCH 5/8] refactor --- python/prophet/tests/test_prophet.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 257380449..20b903559 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -30,7 +30,7 @@ class TestProphetFitPredictDefault: def test_fit_predict(self, daily_univariate_ts, backend, scaling, expected): test_days = 30 train, test = train_test_split(daily_univariate_ts, test_days) - forecaster = Prophet(stan_backend=backend, scaling=scaling, negative_prediction_values=False) + forecaster = Prophet(stan_backend=backend, scaling=scaling) forecaster.fit(train, seed=1237861298) np.random.seed(876543987) future = forecaster.make_future_dataframe(test_days, include_history=False) From 3063ab33ee132de84d5fb81947aa02449a1896d7 Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Wed, 9 Oct 2024 23:17:55 +0300 Subject: [PATCH 6/8] Removed seasonality clipping because seasonality values can drop below zero --- python/prophet/forecaster.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 1ba58ff3b..f0cedc0ec 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -1454,10 +1454,7 @@ def predict_seasonal_components(self, df): if component in self.component_modes['additive']: comp *= self.y_scale - if self.negative_prediction_values: - data[component] = np.nanmean(comp, axis=1) - else: - data[component] = np.clip(np.nanmean(comp, axis=1), a_min=0, a_max=None) + data[component] = np.nanmean(comp, axis=1) if self.uncertainty_samples: self.calculate_and_clip_percentile( From 5ec8614464a26c552c0062393fe7c781ff2cbcf1 Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Fri, 11 Oct 2024 00:22:06 +0300 Subject: [PATCH 7/8] Seasonality values can be negative while prediction stays positive --- python/prophet/forecaster.py | 4 ++++ python/prophet/tests/test_prophet.py | 12 +++--------- 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index f0cedc0ec..089a90a81 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -1326,6 +1326,10 @@ def predict(self, df: pd.DataFrame = None, vectorized: bool = True) -> pd.DataFr df2['trend'] * (1 + df2['multiplicative_terms']) + df2['additive_terms'] ) + + if not self.negative_prediction_values: + df2['yhat'] = df2['yhat'].clip(lower=0) + return df2 @staticmethod diff --git a/python/prophet/tests/test_prophet.py b/python/prophet/tests/test_prophet.py index 20b903559..10b9b809b 100644 --- a/python/prophet/tests/test_prophet.py +++ b/python/prophet/tests/test_prophet.py @@ -434,21 +434,15 @@ def test_override_n_changepoints(self, daily_univariate_ts, backend): cp = m.changepoints_t assert cp.shape[0] == 15 - @pytest.mark.parametrize( - "expected", - [5.656087514685135], - ) - def test_without_negative_predictions(self, subdaily_univariate_ts, backend, expected): + def test_without_negative_predictions(self, subdaily_univariate_ts, backend): test_days = 280 train, test = train_test_split(subdaily_univariate_ts, test_days) - forecaster = Prophet(stan_backend=backend, negative_prediction_values=False) + forecaster = Prophet(stan_backend=backend, negative_prediction_values=False, weekly_seasonality=True, yearly_seasonality=True) forecaster.fit(train, seed=1237861298) np.random.seed(876543987) future = forecaster.make_future_dataframe(test_days, include_history=False) future = forecaster.predict(future) - res = rmse(future["yhat"], test["y"]) - tolerance = 1e-5 - assert res == pytest.approx(expected, rel=tolerance), "backend: {}".format(forecaster.stan_backend) + assert (future['yhat'].values >= 0).all() class TestProphetSeasonalComponent: From 62f83fe10cd04ee25cb5a93c60e5ff6c22eaa2a2 Mon Sep 17 00:00:00 2001 From: Murat Ergul Date: Fri, 1 Nov 2024 23:23:39 +0300 Subject: [PATCH 8/8] seasonality upper and lower values can be negative --- python/prophet/forecaster.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/prophet/forecaster.py b/python/prophet/forecaster.py index 089a90a81..e51d83b08 100644 --- a/python/prophet/forecaster.py +++ b/python/prophet/forecaster.py @@ -1461,8 +1461,11 @@ def predict_seasonal_components(self, df): data[component] = np.nanmean(comp, axis=1) if self.uncertainty_samples: - self.calculate_and_clip_percentile( - data, component, comp, lower_p, upper_p + data[component + '_lower'] = self.percentile( + comp, lower_p, axis=1, + ) + data[component + '_upper'] = self.percentile( + comp, upper_p, axis=1, ) return pd.DataFrame(data)