diff --git a/README.md b/README.md index 6a5dce1ef..bd6e88a02 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,7 @@ * [Uninstall](#Uninstall) * [Developer manual](#Developer-manual) * [How to prepare working environment](#How-to-prepare-working-environment) + * [How to update packages](#How-to-update-packages) * [How to run tests locally](#How-to-run-tests-locally) * [Tests system requirements](#Tests-system-requirements) * [Run tests](#Run-tests) @@ -73,16 +74,34 @@ This section will explain how to start development of Gordo. # then: pip install --upgrade pip pip install --upgrade pip-tools -pip install -r requirements/full_requirements.txt +# Some of the packages are in private pypi (Azure artifacts), so you have to specify its url. +# After running next command you will be prompted with and for such pypi-url. +# You might get PAT (personal assess token) by [this instruction](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=preview-page#create-a-pat) +# in Azure DevOps. This PAT should only have "Packaging -> Read" scope. +pip install --extra-index-url -r requirements/full_requirements.txt pip install -r requirements/test_requirements.txt ``` +#### How to update packages +Note: you have to install `pip-tools` version higher then `6` for requirements to have same multi-line output format. + +To update some package in `full_requirements.txt`: +- change its version in `requirements.in` file; +- (todo once) get credentials to access private pypi +(for more details see [How to prepare working environment](#How-to-prepare-working-environment) section); +- compile requirements: +```shell +# this command might be changed with time, so its better to take it from top of the `full_requirements.txt` file. +pip-compile --extra-index-url --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in +``` + ### How to run tests locally #### Tests system requirements To run tests it's required for your system to has (note: commands might differ from your OS): - running docker process; -- available 5432 port for postgres container. +- available 5432 port for postgres container +(`postgresql` container is used, so better to stop your local instance for tests running). #### Run tests List of commands to run tests can be found [here](/setup.cfg). @@ -96,4 +115,6 @@ python3.7 -m pytest ... #### How to run tests in debug mode Note: this example is for Pycharm IDE to use `breakpoints` in the code of the tests. -On the configuration setup for test running add to `Additional arguments:` in `pytest` section following string: `--ignore benchmarks --cov-report= --no-cov ` +On the configuration setup for test running add to `Additional arguments:` in `pytest` +section following string: `--ignore benchmarks --cov-report= --no-cov ` +or TEMPORARY remove `--cov-report=xml` and `--cov=gordo` from `pytest.ini` file. diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py index 051cd3ceb..bcdebd21c 100644 --- a/gordo/machine/model/anomaly/diff.py +++ b/gordo/machine/model/anomaly/diff.py @@ -634,7 +634,7 @@ def cross_validate( def _calculate_feature_thresholds( self, y_true: pd.DataFrame, y_pred: pd.DataFrame - ) -> np.ndarray: + ) -> Union[float, pd.Series]: absolute_error = self._absolute_error(y_true, y_pred) return self._calculate_threshold(absolute_error) diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py index 065deb0b4..fc727cd0d 100644 --- a/gordo/machine/model/models.py +++ b/gordo/machine/model/models.py @@ -556,7 +556,9 @@ def _validate_and_fix_size_of_X(self, X): ) return X - def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "KerasLSTMForecast": + def fit( # type: ignore + self, X: np.ndarray, y: np.ndarray, **kwargs + ) -> "KerasLSTMForecast": """ This fits a one step forecast LSTM architecture. diff --git a/gordo/machine/model/transformers/imputer.py b/gordo/machine/model/transformers/imputer.py index 0ee0e256b..85bb448a8 100644 --- a/gordo/machine/model/transformers/imputer.py +++ b/gordo/machine/model/transformers/imputer.py @@ -86,7 +86,7 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y=None): def transform(self, X: Union[pd.DataFrame, np.ndarray], y=None): # Ensure we're dealing with numpy array if it's a dataframe or similar - X = X.values if hasattr(X, "values") else X + X = X.values if isinstance(X, pd.DataFrame) else X # Apply specific fill values if provided. if self.inf_fill_value is not None: diff --git a/gordo/machine/model/utils.py b/gordo/machine/model/utils.py index 671cc74ca..0a6ddcdf0 100644 --- a/gordo/machine/model/utils.py +++ b/gordo/machine/model/utils.py @@ -69,10 +69,10 @@ def make_base_dataframe( model_output: np.ndarray Raw model output target_tag_list: Optional[Union[List[SensorTag], List[str]]] - Tags to be assigned to ``model-output`` if not assinged but model output matches + Tags to be assigned to ``model-output`` if not assigned but model output matches model input, ``tags`` will be used. index: Optional[np.ndarray] - The index which should be assinged to the resulting dataframe, will be clipped + The index which should be assigned to the resulting dataframe, will be clipped to the length of ``model_output``, should the model output less than its input. frequency: Optional[datetime.timedelta] The spacing of the time between points. @@ -92,16 +92,16 @@ def make_base_dataframe( names_n_values = (("model-input", model_input), ("model-output", model_output)) # Define the index which all series/dataframes will share - index = ( + normalised_index = ( index[-len(model_output) :] if index is not None else range(len(model_output)) ) # Series to hold the start times for each point or just 'None' values start_series = pd.Series( - index - if isinstance(index, pd.DatetimeIndex) - else (None for _ in range(len(index))), - index=index, + normalised_index + if isinstance(normalised_index, pd.DatetimeIndex) + else (None for _ in range(len(normalised_index))), + index=normalised_index, ) # Calculate the end times if possible, or also all 'None's @@ -122,7 +122,7 @@ def make_base_dataframe( data: pd.DataFrame = pd.DataFrame( {("start", ""): start_series, ("end", ""): end_series}, columns=columns, - index=index, + index=normalised_index, ) # Begin looping over the model-input and model-output; mapping them into @@ -150,7 +150,9 @@ def make_base_dataframe( ) # Pass valudes, offsetting any differences in length compared to index, as set by model-output size - other = pd.DataFrame(values[-len(model_output) :], columns=columns, index=index) + other = pd.DataFrame( + values[-len(model_output) :], columns=columns, index=normalised_index + ) data = data.join(other) return data diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py index 7a6898c3d..0209ccc3a 100644 --- a/gordo/serializer/from_definition.py +++ b/gordo/serializer/from_definition.py @@ -205,7 +205,7 @@ def _build_callbacks(definitions: list): -------- >>> callbacks=_build_callbacks([{'tensorflow.keras.callbacks.EarlyStopping': {'monitor': 'val_loss,', 'patience': 10}}]) >>> type(callbacks[0]) - + Returns ------- diff --git a/mypy.ini b/mypy.ini index 72053b3e6..17ed96ecd 100644 --- a/mypy.ini +++ b/mypy.ini @@ -3,3 +3,4 @@ [mypy] python_version = 3.7 ignore_missing_imports = True +plugins = numpy.typing.mypy_plugin diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt index 4f1ef44c0..939157a7f 100644 --- a/requirements/full_requirements.txt +++ b/requirements/full_requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with python 3.7 # To update, run: # -# pip-compile --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in +# pip-compile --extra-index-url= --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in # absl-py==0.11.0 # via @@ -54,7 +54,9 @@ azure-mgmt-resource==8.0.0 azure-mgmt-storage==7.1.0 # via azureml-core azure-storage-blob==12.9.0 - # via azure-storage-file-datalake + # via + # azure-storage-file-datalake + # gordo-client azure-storage-file-datalake==12.3.1 # via gordo-dataset azureml-contrib-run==1.0.85 @@ -67,6 +69,8 @@ backports.tempfile==1.0 # via azureml-core backports.weakref==1.0.post1 # via backports.tempfile +cached-property==1.5.2 + # via h5py cachetools==4.1.1 # via # google-auth @@ -147,9 +151,9 @@ google-auth-oauthlib==0.4.1 # via tensorboard google-pasta==0.2.0 # via tensorflow -gordo-client==4.1.1 +gordo-client==4.1.3 # via -r requirements.in -gordo-dataset==3.2.2 +gordo-dataset==3.3.0.dev1 # via # -r requirements.in # gordo-client @@ -165,7 +169,7 @@ gunicorn==20.0.4 # via # -r requirements.in # mlflow -h5py==2.10.0 +h5py==3.1.0 # via # -r requirements.in # tensorflow @@ -201,10 +205,14 @@ jsonpickle==1.2 # azureml-mlflow jsonschema==3.2.0 # via flask-restplus +keras==2.7.0 + # via tensorflow keras-preprocessing==1.1.2 # via tensorflow kiwisolver==1.1.0 # via matplotlib +libclang==12.0.0 + # via tensorflow mako==1.1.1 # via alembic markdown==3.1.1 @@ -265,11 +273,10 @@ numexpr==2.7.1 # via # -r requirements.in # gordo-dataset -numpy==1.19.5 +numpy==1.21.0 # via # -r requirements.in # catboost - # gordo-client # gordo-dataset # h5py # keras-preprocessing @@ -319,10 +326,8 @@ protobuf==3.11.2 # tensorflow psycopg2-binary==2.8.4 # via -r postgres_requirements.in -pyarrow==0.17.1 - # via - # gordo-client - # gordo-dataset +pyarrow==6.0.1 + # via gordo-dataset pyasn1==0.4.8 # via # ndg-httpsclient @@ -406,7 +411,6 @@ ruamel.yaml==0.15.89 scikit-learn==0.23.2 # via # -r requirements.in - # gordo-client # gordo-dataset scipy==1.4.1 # via @@ -434,7 +438,6 @@ six==1.15.0 # google-auth # google-pasta # grpcio - # h5py # influxdb # isodate # jsonschema @@ -447,7 +450,6 @@ six==1.15.0 # python-dateutil # querystring-parser # retrying - # tensorboard # tensorflow # websocket-client smmap2==2.0.5 @@ -462,13 +464,17 @@ stringcase==1.2.0 # via dataclasses-json tabulate==0.8.6 # via databricks-cli -tensorboard==2.4.1 +tensorboard==2.7.0 # via tensorflow +tensorboard-data-server==0.6.1 + # via tensorboard tensorboard-plugin-wit==1.8.0 # via tensorboard -tensorflow==2.4.4 +tensorflow==2.7.0 # via -r requirements.in -tensorflow-estimator==2.4.0 +tensorflow-estimator==2.7.0 + # via tensorflow +tensorflow-io-gcs-filesystem==0.23.1 # via tensorflow termcolor==1.1.0 # via tensorflow diff --git a/requirements/requirements.in b/requirements/requirements.in index 7554c0645..511233b5e 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -3,7 +3,7 @@ Click~=7.0 dictdiffer~=0.8 dataclasses-json~=0.3 gunicorn~=20.0 -h5py~=2.8 +h5py~=3.1.0 jinja2~=2.11 numpy~=1.18 pandas~=1.0 @@ -12,7 +12,7 @@ python-dateutil~=2.8 pyyaml~=5.3 requests~=2.25 scikit-learn~=0.23 -tensorflow~=2.4.4 +tensorflow~=2.7.0 Flask~=1.0 flask-restplus~=0.12 Werkzeug==0.16.1 # flask-restplus requires Werkzeug, but is incompatible with 1.0.0. When that is fixed this explicit dependency can be dropped @@ -24,8 +24,8 @@ typing_extensions~=3.7 prometheus_client~=0.7.1 azure-identity~=1.4.0 PyYAML~=5.4 -gordo-dataset~=3.2.2 +gordo-dataset~=3.3.0dev1 jeepney>=0.6 packaging~=20.7 pydantic>=1.7.4 -gordo-client~=4.1.1 +gordo-client~=4.1.3