ITMO-NSS-team · Lopa10ko · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 20, 2024
diff --git a/pytsbe/models/automl_forecasters/automl.py b/pytsbe/models/automl_forecasters/automl.py
@@ -4,7 +4,7 @@
 import numpy as np
 import pandas as pd
 from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
-from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
+from fedot.core.repository.metrics_repository import RegressionMetricsEnum
 from golem.core.tuning.simultaneous import SimultaneousTuner
 
 from pytsbe.data.forecast_output import ForecastResults

diff --git a/pytsbe/models/chronos/__init__.py b/pytsbe/models/chronos/__init__.py
diff --git a/pytsbe/models/chronos/chronos_forecaster.py b/pytsbe/models/chronos/chronos_forecaster.py
@@ -0,0 +1,51 @@
+import copy
+import numpy as np
+import pandas as pd
+import torch
+
+from pytsbe.data.forecast_output import ForecastResults
+from pytsbe.models.forecast import Forecaster
+
+try:
+    from chronos import ChronosPipeline
+except ImportError:
+    print('Try installing Chronos via pip install '
+          'git+https://github.com/amazon-science/chronos-forecasting.git')
+
+
+class ChronosForecaster(Forecaster):
+    """
+    Class for time series forecasting with Chronos pretrained models
+    Source code: https://github.com/amazon-science/chronos-forecasting
+    """
+
+    def __init__(self, **params):
+        super().__init__(**params)
+        self.target = 'value'
+        self.forecaster = self.__load_pretrained_pipeline(params.get('hf_model', 'amazon/chronos-t5-tiny'))
+
+    def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        pass
+
+    def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
+                            predictors_columns: list, **kwargs):
+        raise NotImplementedError('Chronos does not support fit for multivariate time series forecasting')
+
+    def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        forecast = self.forecaster.predict(
+            torch.tensor(historical_values[self.target].values),
+            prediction_length=forecast_horizon
+        )
+        return ForecastResults(predictions=np.median(forecast[0].numpy(), axis=0))
+
+    def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
+                                predictors_columns: list, **kwargs):
+        raise NotImplementedError('Chronos does not support predict for multivariate time series forecasting')
+
+    @staticmethod
+    def __load_pretrained_pipeline(hf_model: str) -> ChronosPipeline:
+        return ChronosPipeline.from_pretrained(
+            hf_model,
+            torch_dtype=torch.bfloat16,
+            device_map="cuda" if torch.cuda.is_available() else "cpu"
+        )
diff --git a/pytsbe/models/fedot_industrial_forecaster.py b/pytsbe/models/fedot_industrial_forecaster.py
@@ -0,0 +1,83 @@
+import pandas as pd
+import numpy as np
+import shutil
+
+
+try:
+    from fedot_ind.api.main import FedotIndustrial
+except ImportError:
+    print('Does not found Fedot.Industrial library. Continue...')
+
+
+from pytsbe.data.forecast_output import ForecastResults
+from pytsbe.models.forecast import Forecaster
+
+import logging
+logging.raiseExceptions = False
+
+
+class FedotIndustrialForecaster(Forecaster):
+    """
+    Class for time series forecasting with FEDOT.Industrial framework
+    Source code: https://github.com/aimclub/Fedot.Industrial
+    """
+
+    def __init__(self, **params):
+        super().__init__(**params)
+        default_params = {
+            'timeout': 6,
+            'n_jobs': 1,
+            'metric': 'smape',
+            'pop_size': 10,
+            'with_tuning': False,
+            'industrial_strategy': 'forecasting_assumptions'
+        }
+        self.init_params = {**default_params, **params}
+        self.obtained_model = None
+
+    def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        """ Train FEDOT.Industrial framework (launch AutoML algorithm) """
+        input_data = prepare_input_ts_data(historical_values, forecast_horizon)
+
+        model = FedotIndustrial(problem='ts_forecasting',
+                                task_params={'forecast_length': forecast_horizon},
+                                **self.init_params)
+        model.fit(input_data)
+        self.obtained_model = model
+
+        # TODO: remove when composition history managing becomes a responsibility of Fedot.Industrial
+        shutil.rmtree(model.config_dict.get('history_dir'))
+
+    def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
+                            target_column: str, predictors_columns: list, **kwargs):
+        """ Create pipeline for multivariate time series forecasting """
+        raise NotImplementedError()
+
+    def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
+        """ Use obtained pipeline to make predictions """
+        input_data = prepare_input_ts_data(historical_values, forecast_horizon)
+        auto_labels = self.obtained_model.predict(input_data)
+
+        min_metric = float('inf')
+        metric = self.init_params.get('metric', 'smape')
+        for forecast_model, predict in auto_labels.items():
+            self.obtained_model.predicted_labels = predict
+            current_metric = self.obtained_model.get_metrics(target=input_data[1],
+                                                             metric_names=tuple([metric]))[metric][0]
+
+            if float(current_metric) < min_metric:
+                min_metric = current_metric
+                forecast = predict
+
+        return ForecastResults(predictions=np.array(forecast))
+
+    def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
+                                target_column: str, predictors_columns: list, **kwargs):
+        raise NotImplementedError()
+
+
+def prepare_input_ts_data(historical_values: pd.DataFrame, forecast_horizon: int):
+    """ Return converted into InputData datasets for train and for prediction """
+    time_series_label = 'value'
+    series = np.array(historical_values[time_series_label])
+    return series.flatten(), series[-forecast_horizon:].flatten()
diff --git a/pytsbe/models/lagllama/__init__.py b/pytsbe/models/lagllama/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/__init__.py b/pytsbe/models/lagllama/lag_llama/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/__init__.py b/pytsbe/models/lagllama/lag_llama/data/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/__init__.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/__init__.py
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mask.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mask.py
@@ -0,0 +1,40 @@
+import torch
+
+
+@torch.no_grad()
+def freq_mask(x, y, rate=0.1, dim=1):
+    # Get lengths of the input tensors along the specified dimension.
+    x_len = x.shape[dim]
+    y_len = y.shape[dim]
+
+    # Concatenate x and y along the specified dimension.
+    # x and y represent past and future targets respectively.
+    xy = torch.cat([x, y], dim=dim)
+
+    # Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
+    # This transforms the time series data into the frequency domain.
+    xy_f = torch.fft.rfft(xy, dim=dim)
+
+    # Create a random mask with a probability defined by 'rate'.
+    # This mask will be used to randomly select frequencies to be zeroed out.
+    m = torch.rand_like(xy_f, dtype=xy.dtype) < rate
+
+    # Apply the mask to the real and imaginary parts of the frequency data,
+    # setting the selected frequencies to zero. This 'masks' those frequencies.
+    freal = xy_f.real.masked_fill(m, 0)
+    fimag = xy_f.imag.masked_fill(m, 0)
+
+    # Combine the masked real and imaginary parts back into complex frequency data.
+    xy_f = torch.complex(freal, fimag)
+
+    # Perform an inverse RFFT to transform the data back to the time domain.
+    # The masked frequencies will affect the reconstructed time series.
+    xy = torch.fft.irfft(xy_f, dim=dim)
+
+    # If the reconstructed data length differs from the original concatenated length,
+    # adjust it to maintain consistency. This step ensures the output shape matches the input.
+    if x_len + y_len != xy.shape[dim]:
+        xy = torch.cat([x[:, 0:1, ...], xy], 1)
+
+    # Split the reconstructed data back into two parts corresponding to the original x and y.
+    return torch.split(xy, [x_len, y_len], dim=dim)
diff --git a/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mix.py b/pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mix.py
@@ -0,0 +1,63 @@
+import numpy as np
+import torch
+
+
+@torch.no_grad()
+def freq_mix(x, y, rate=0.1, dim=1):
+    # Get lengths of the input tensors along the specified dimension.
+    x_len = x.shape[dim]
+    y_len = y.shape[dim]
+
+    # Concatenate x and y along the specified dimension.
+    # x and y represent past and future targets respectively.
+    xy = torch.cat([x, y], dim=dim)
+
+    # Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
+    xy_f = torch.fft.rfft(xy, dim=dim)
+
+    # Create a random mask with a probability defined by 'rate'.
+    # This mask will be used to select which frequencies to manipulate.
+    m = torch.rand_like(xy_f, dtype=xy.dtype) < rate
+
+    # Calculate the amplitude of the frequency components.
+    amp = abs(xy_f)
+
+    # Sort the amplitudes and create a mask to ignore the most dominant frequencies.
+    _, index = amp.sort(dim=dim, descending=True)
+    dominant_mask = index > 2
+    m = torch.bitwise_and(m, dominant_mask)
+
+    # Apply the mask to the real and imaginary parts of the frequency data,
+    # setting masked frequencies to zero.
+    freal = xy_f.real.masked_fill(m, 0)
+    fimag = xy_f.imag.masked_fill(m, 0)
+
+    # Shuffle the batches in x and y to mix data from different sequences.
+    b_idx = np.arange(x.shape[0])
+    np.random.shuffle(b_idx)
+    x2, y2 = x[b_idx], y[b_idx]
+
+    # Concatenate the shuffled tensors and perform RFFT.
+    xy2 = torch.cat([x2, y2], dim=dim)
+    xy2_f = torch.fft.rfft(xy2, dim=dim)
+
+    # Invert the mask and apply it to the shuffled frequency data.
+    m = torch.bitwise_not(m)
+    freal2 = xy2_f.real.masked_fill(m, 0)
+    fimag2 = xy2_f.imag.masked_fill(m, 0)
+
+    # Combine the original and shuffled frequency data.
+    freal += freal2
+    fimag += fimag2
+
+    # Reconstruct the complex frequency data and perform an inverse RFFT.
+    xy_f = torch.complex(freal, fimag)
+    xy = torch.fft.irfft(xy_f, dim=dim)
+
+    # If the reconstructed data length differs from the original concatenated length,
+    # adjust it to maintain consistency.
+    if x_len + y_len != xy.shape[dim]:
+        xy = torch.cat([x[:, 0:1, ...], xy], 1)
+
+    # Split the reconstructed data back into two parts corresponding to the original x and y.
+    return torch.split(xy, [x_len, y_len], dim=dim)