Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add new wrappers (llm and fedot.industrial forecasters) #23

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pytsbe/models/automl_forecasters/automl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import numpy as np
import pandas as pd
from fedot.core.pipelines.tuning.tuner_builder import TunerBuilder
from fedot.core.repository.quality_metrics_repository import RegressionMetricsEnum
from fedot.core.repository.metrics_repository import RegressionMetricsEnum
from golem.core.tuning.simultaneous import SimultaneousTuner

from pytsbe.data.forecast_output import ForecastResults
Expand Down
Empty file.
51 changes: 51 additions & 0 deletions pytsbe/models/chronos/chronos_forecaster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
import copy
import numpy as np
import pandas as pd
import torch

from pytsbe.data.forecast_output import ForecastResults
from pytsbe.models.forecast import Forecaster

try:
from chronos import ChronosPipeline
except ImportError:
print('Try installing Chronos via pip install '
'git+https://github.com/amazon-science/chronos-forecasting.git')


class ChronosForecaster(Forecaster):
"""
Class for time series forecasting with Chronos pretrained models
Source code: https://github.com/amazon-science/chronos-forecasting
"""

def __init__(self, **params):
super().__init__(**params)
self.target = 'value'
self.forecaster = self.__load_pretrained_pipeline(params.get('hf_model', 'amazon/chronos-t5-tiny'))

def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
pass

def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
predictors_columns: list, **kwargs):
raise NotImplementedError('Chronos does not support fit for multivariate time series forecasting')

def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
forecast = self.forecaster.predict(
torch.tensor(historical_values[self.target].values),
prediction_length=forecast_horizon
)
return ForecastResults(predictions=np.median(forecast[0].numpy(), axis=0))

def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, target_column: str,
predictors_columns: list, **kwargs):
raise NotImplementedError('Chronos does not support predict for multivariate time series forecasting')

@staticmethod
def __load_pretrained_pipeline(hf_model: str) -> ChronosPipeline:
return ChronosPipeline.from_pretrained(
hf_model,
torch_dtype=torch.bfloat16,
device_map="cuda" if torch.cuda.is_available() else "cpu"
)
83 changes: 83 additions & 0 deletions pytsbe/models/fedot_industrial_forecaster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import pandas as pd
import numpy as np
import shutil


try:
from fedot_ind.api.main import FedotIndustrial
except ImportError:
print('Does not found Fedot.Industrial library. Continue...')


from pytsbe.data.forecast_output import ForecastResults
from pytsbe.models.forecast import Forecaster

import logging
logging.raiseExceptions = False


class FedotIndustrialForecaster(Forecaster):
"""
Class for time series forecasting with FEDOT.Industrial framework
Source code: https://github.com/aimclub/Fedot.Industrial
"""

def __init__(self, **params):
super().__init__(**params)
default_params = {
'timeout': 6,
'n_jobs': 1,
'metric': 'smape',
'pop_size': 10,
'with_tuning': False,
'industrial_strategy': 'forecasting_assumptions'
}
self.init_params = {**default_params, **params}
self.obtained_model = None

def fit_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
""" Train FEDOT.Industrial framework (launch AutoML algorithm) """
input_data = prepare_input_ts_data(historical_values, forecast_horizon)

model = FedotIndustrial(problem='ts_forecasting',
task_params={'forecast_length': forecast_horizon},
**self.init_params)
model.fit(input_data)
self.obtained_model = model

# TODO: remove when composition history managing becomes a responsibility of Fedot.Industrial
shutil.rmtree(model.config_dict.get('history_dir'))

def fit_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
target_column: str, predictors_columns: list, **kwargs):
""" Create pipeline for multivariate time series forecasting """
raise NotImplementedError()

def predict_univariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int, **kwargs):
""" Use obtained pipeline to make predictions """
input_data = prepare_input_ts_data(historical_values, forecast_horizon)
auto_labels = self.obtained_model.predict(input_data)

min_metric = float('inf')
metric = self.init_params.get('metric', 'smape')
for forecast_model, predict in auto_labels.items():
self.obtained_model.predicted_labels = predict
current_metric = self.obtained_model.get_metrics(target=input_data[1],
metric_names=tuple([metric]))[metric][0]

if float(current_metric) < min_metric:
min_metric = current_metric
forecast = predict

return ForecastResults(predictions=np.array(forecast))

def predict_multivariate_ts(self, historical_values: pd.DataFrame, forecast_horizon: int,
target_column: str, predictors_columns: list, **kwargs):
raise NotImplementedError()


def prepare_input_ts_data(historical_values: pd.DataFrame, forecast_horizon: int):
""" Return converted into InputData datasets for train and for prediction """
time_series_label = 'value'
series = np.array(historical_values[time_series_label])
return series.flatten(), series[-forecast_horizon:].flatten()
Empty file.
Empty file.
Empty file.
Empty file.
40 changes: 40 additions & 0 deletions pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mask.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import torch


@torch.no_grad()
def freq_mask(x, y, rate=0.1, dim=1):
# Get lengths of the input tensors along the specified dimension.
x_len = x.shape[dim]
y_len = y.shape[dim]

# Concatenate x and y along the specified dimension.
# x and y represent past and future targets respectively.
xy = torch.cat([x, y], dim=dim)

# Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
# This transforms the time series data into the frequency domain.
xy_f = torch.fft.rfft(xy, dim=dim)

# Create a random mask with a probability defined by 'rate'.
# This mask will be used to randomly select frequencies to be zeroed out.
m = torch.rand_like(xy_f, dtype=xy.dtype) < rate

# Apply the mask to the real and imaginary parts of the frequency data,
# setting the selected frequencies to zero. This 'masks' those frequencies.
freal = xy_f.real.masked_fill(m, 0)
fimag = xy_f.imag.masked_fill(m, 0)

# Combine the masked real and imaginary parts back into complex frequency data.
xy_f = torch.complex(freal, fimag)

# Perform an inverse RFFT to transform the data back to the time domain.
# The masked frequencies will affect the reconstructed time series.
xy = torch.fft.irfft(xy_f, dim=dim)

# If the reconstructed data length differs from the original concatenated length,
# adjust it to maintain consistency. This step ensures the output shape matches the input.
if x_len + y_len != xy.shape[dim]:
xy = torch.cat([x[:, 0:1, ...], xy], 1)

# Split the reconstructed data back into two parts corresponding to the original x and y.
return torch.split(xy, [x_len, y_len], dim=dim)
63 changes: 63 additions & 0 deletions pytsbe/models/lagllama/lag_llama/data/augmentations/freq_mix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
import numpy as np
import torch


@torch.no_grad()
def freq_mix(x, y, rate=0.1, dim=1):
# Get lengths of the input tensors along the specified dimension.
x_len = x.shape[dim]
y_len = y.shape[dim]

# Concatenate x and y along the specified dimension.
# x and y represent past and future targets respectively.
xy = torch.cat([x, y], dim=dim)

# Perform a real-valued fast Fourier transform (RFFT) on the concatenated tensor.
xy_f = torch.fft.rfft(xy, dim=dim)

# Create a random mask with a probability defined by 'rate'.
# This mask will be used to select which frequencies to manipulate.
m = torch.rand_like(xy_f, dtype=xy.dtype) < rate

# Calculate the amplitude of the frequency components.
amp = abs(xy_f)

# Sort the amplitudes and create a mask to ignore the most dominant frequencies.
_, index = amp.sort(dim=dim, descending=True)
dominant_mask = index > 2
m = torch.bitwise_and(m, dominant_mask)

# Apply the mask to the real and imaginary parts of the frequency data,
# setting masked frequencies to zero.
freal = xy_f.real.masked_fill(m, 0)
fimag = xy_f.imag.masked_fill(m, 0)

# Shuffle the batches in x and y to mix data from different sequences.
b_idx = np.arange(x.shape[0])
np.random.shuffle(b_idx)
x2, y2 = x[b_idx], y[b_idx]

# Concatenate the shuffled tensors and perform RFFT.
xy2 = torch.cat([x2, y2], dim=dim)
xy2_f = torch.fft.rfft(xy2, dim=dim)

# Invert the mask and apply it to the shuffled frequency data.
m = torch.bitwise_not(m)
freal2 = xy2_f.real.masked_fill(m, 0)
fimag2 = xy2_f.imag.masked_fill(m, 0)

# Combine the original and shuffled frequency data.
freal += freal2
fimag += fimag2

# Reconstruct the complex frequency data and perform an inverse RFFT.
xy_f = torch.complex(freal, fimag)
xy = torch.fft.irfft(xy_f, dim=dim)

# If the reconstructed data length differs from the original concatenated length,
# adjust it to maintain consistency.
if x_len + y_len != xy.shape[dim]:
xy = torch.cat([x[:, 0:1, ...], xy], 1)

# Split the reconstructed data back into two parts corresponding to the original x and y.
return torch.split(xy, [x_len, y_len], dim=dim)
Loading