diff --git a/vaep/sklearn/ae_transformer.py b/vaep/sklearn/ae_transformer.py index c77d9ab38..a64af1a31 100644 --- a/vaep/sklearn/ae_transformer.py +++ b/vaep/sklearn/ae_transformer.py @@ -36,9 +36,10 @@ class AETransformer(TransformerMixin, BaseEstimator): """Autoencoder transformer (Denoising or Variational). Autoencoder transformer which can be used to impute missing values - in a dataset it is fitted to. Currently the data is standard normalized - for fitting the model, but imputations are provided the original scale + in a dataset it is fitted to. The data is standard normalized + for fitting the model, but imputations are provided on the original scale after internally fitting the model. + The data uses the wide data format with samples as rows and features as columns. diff --git a/vaep/sklearn/cf_transformer.py b/vaep/sklearn/cf_transformer.py index 36e0334f3..1ffca9a6c 100644 --- a/vaep/sklearn/cf_transformer.py +++ b/vaep/sklearn/cf_transformer.py @@ -36,6 +36,8 @@ class CollaborativeFilteringTransformer(TransformerMixin, BaseEstimator): three columns. The sample and feature identifiers are embedded into a space which is then used to predict the quantitative value. + The data is expected as a Series with a MultiIndex of the sample and feature identifiers, + and the quantitative value as its values. Parameters ---------- @@ -78,15 +80,17 @@ def fit(self, X: pd.Series, y: pd.Series = None, Parameters ---------- - X : Series, shape (n_samples, ) - The training data as a Series with the target_column as entries and name, - which has the item_column and sample_column set in a MultiIndex. - Is of shape (n_samples, ) + X : Series, shape (n_values, ) + The training data as a Series with the target_column as it values + and target_column as its name. The Series has a MultiIndex defined by the + item_column and sample_column. + Is of shape (n_values, ) y : Series, optional - The validation data as a Series with the target_column as entries and name, - which has the item_column and sample_column set in a MultiIndex. - Is of shape (n_samples, ), by default None + The validation data as a Series with the target_column as it values + and target_column as its name. The Series has a MultiIndex defined by the + item_column and sample_column. + Is of shape (n_values, ), by default None epochs_max : int, optional Maximal number of epochs to train, by default 100