31230 update tensorflow and numpy versions (vulnerabilities) (#1214)

* 31230 update tensorflow and numpy versions (vulnerabilities) * black * black * doctest
equinor · Jan 13, 2022 · 3b89867 · 3b89867
1 parent 927a3be
commit 3b89867
Show file tree

Hide file tree

Showing 9 changed files with 69 additions and 37 deletions.
diff --git a/README.md b/README.md
@@ -33,6 +33,7 @@
 * [Uninstall](#Uninstall)
 * [Developer manual](#Developer-manual)
     * [How to prepare working environment](#How-to-prepare-working-environment)
+      * [How to update packages](#How-to-update-packages)
     * [How to run tests locally](#How-to-run-tests-locally)
         * [Tests system requirements](#Tests-system-requirements)
         * [Run tests](#Run-tests)
@@ -73,16 +74,34 @@ This section will explain how to start development of Gordo.
 # then:
 pip install --upgrade pip
 pip install --upgrade pip-tools
-pip install -r requirements/full_requirements.txt
+# Some of the packages are in private pypi (Azure artifacts), so you have to specify its url.
+# After running next command you will be prompted with <PAT name> and <PAT password> for such pypi-url.
+# You might get PAT (personal assess token) by [this instruction](https://docs.microsoft.com/en-us/azure/devops/organizations/accounts/use-personal-access-tokens-to-authenticate?view=azure-devops&tabs=preview-page#create-a-pat) 
+# in Azure DevOps. This PAT should only have "Packaging -> Read" scope.
+pip install --extra-index-url <https://private-pypi-repo-url/> -r requirements/full_requirements.txt
 pip install -r requirements/test_requirements.txt
 ```
 
+#### How to update packages
+Note: you have to install `pip-tools` version higher then `6` for requirements to have same multi-line output format.    
+
+To update some package in `full_requirements.txt`:
+- change its version in `requirements.in` file;
+- (todo once) get credentials to access private pypi 
+(for more details see [How to prepare working environment](#How-to-prepare-working-environment) section);
+- compile requirements:
+```shell
+# this command might be changed with time, so its better to take it from top of the `full_requirements.txt` file.
+pip-compile --extra-index-url <https://private-pypi-repo-url/> --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in  
+```
+
 ### How to run tests locally
 
 #### Tests system requirements
 To run tests it's required for your system to has (note: commands might differ from your OS):
 - running docker process;
-- available 5432 port for postgres container.
+- available 5432 port for postgres container 
+(`postgresql` container is used, so better to stop your local instance for tests running). 
 
 #### Run tests
 List of commands to run tests can be found [here](/setup.cfg).  
@@ -96,4 +115,6 @@ python3.7 -m pytest ...
 
 #### How to run tests in debug mode
 Note: this example is for Pycharm IDE to use `breakpoints` in the code of the tests.  
-On the configuration setup for test running add to `Additional arguments:` in `pytest` section following string: `--ignore benchmarks --cov-report= --no-cov `
+On the configuration setup for test running add to `Additional arguments:` in `pytest` 
+section following string: `--ignore benchmarks --cov-report= --no-cov ` 
+or TEMPORARY remove `--cov-report=xml` and `--cov=gordo` from `pytest.ini` file.
diff --git a/gordo/machine/model/anomaly/diff.py b/gordo/machine/model/anomaly/diff.py
@@ -634,7 +634,7 @@ def cross_validate(
 
     def _calculate_feature_thresholds(
         self, y_true: pd.DataFrame, y_pred: pd.DataFrame
-    ) -> np.ndarray:
+    ) -> Union[float, pd.Series]:
         absolute_error = self._absolute_error(y_true, y_pred)
         return self._calculate_threshold(absolute_error)
 

diff --git a/gordo/machine/model/models.py b/gordo/machine/model/models.py
@@ -556,7 +556,9 @@ def _validate_and_fix_size_of_X(self, X):
             )
         return X
 
-    def fit(self, X: np.ndarray, y: np.ndarray, **kwargs) -> "KerasLSTMForecast":
+    def fit(  # type: ignore
+        self, X: np.ndarray, y: np.ndarray, **kwargs
+    ) -> "KerasLSTMForecast":
 
         """
         This fits a one step forecast LSTM architecture.

diff --git a/gordo/machine/model/transformers/imputer.py b/gordo/machine/model/transformers/imputer.py
@@ -86,7 +86,7 @@ def fit(self, X: Union[pd.DataFrame, np.ndarray], y=None):
     def transform(self, X: Union[pd.DataFrame, np.ndarray], y=None):
 
         # Ensure we're dealing with numpy array if it's a dataframe or similar
-        X = X.values if hasattr(X, "values") else X
+        X = X.values if isinstance(X, pd.DataFrame) else X
 
         # Apply specific fill values if provided.
         if self.inf_fill_value is not None:

diff --git a/gordo/machine/model/utils.py b/gordo/machine/model/utils.py
@@ -69,10 +69,10 @@ def make_base_dataframe(
     model_output: np.ndarray
         Raw model output
     target_tag_list: Optional[Union[List[SensorTag], List[str]]]
-        Tags to be assigned to ``model-output`` if not assinged but model output matches
+        Tags to be assigned to ``model-output`` if not assigned but model output matches
         model input, ``tags`` will be used.
     index: Optional[np.ndarray]
-        The index which should be assinged to the resulting dataframe, will be clipped
+        The index which should be assigned to the resulting dataframe, will be clipped
         to the length of ``model_output``, should the model output less than its input.
     frequency: Optional[datetime.timedelta]
         The spacing of the time between points.
@@ -92,16 +92,16 @@ def make_base_dataframe(
     names_n_values = (("model-input", model_input), ("model-output", model_output))
 
     # Define the index which all series/dataframes will share
-    index = (
+    normalised_index = (
         index[-len(model_output) :] if index is not None else range(len(model_output))
     )
 
     # Series to hold the start times for each point or just 'None' values
     start_series = pd.Series(
-        index
-        if isinstance(index, pd.DatetimeIndex)
-        else (None for _ in range(len(index))),
-        index=index,
+        normalised_index
+        if isinstance(normalised_index, pd.DatetimeIndex)
+        else (None for _ in range(len(normalised_index))),
+        index=normalised_index,
     )
 
     # Calculate the end times if possible, or also all 'None's
@@ -122,7 +122,7 @@ def make_base_dataframe(
     data: pd.DataFrame = pd.DataFrame(
         {("start", ""): start_series, ("end", ""): end_series},
         columns=columns,
-        index=index,
+        index=normalised_index,
     )
 
     # Begin looping over the model-input and model-output; mapping them into
@@ -150,7 +150,9 @@ def make_base_dataframe(
         )
 
         # Pass valudes, offsetting any differences in length compared to index, as set by model-output size
-        other = pd.DataFrame(values[-len(model_output) :], columns=columns, index=index)
+        other = pd.DataFrame(
+            values[-len(model_output) :], columns=columns, index=normalised_index
+        )
         data = data.join(other)
 
     return data
diff --git a/gordo/serializer/from_definition.py b/gordo/serializer/from_definition.py
@@ -205,7 +205,7 @@ def _build_callbacks(definitions: list):
     --------
     >>> callbacks=_build_callbacks([{'tensorflow.keras.callbacks.EarlyStopping': {'monitor': 'val_loss,', 'patience': 10}}])
     >>> type(callbacks[0])
-    <class 'tensorflow.python.keras.callbacks.EarlyStopping'>
+    <class 'keras.callbacks.EarlyStopping'>
 
     Returns
     -------

diff --git a/mypy.ini b/mypy.ini
@@ -3,3 +3,4 @@
 [mypy]
 python_version = 3.7
 ignore_missing_imports = True
+plugins = numpy.typing.mypy_plugin
diff --git a/requirements/full_requirements.txt b/requirements/full_requirements.txt
@@ -2,7 +2,7 @@
 # This file is autogenerated by pip-compile with python 3.7
 # To update, run:
 #
-#    pip-compile --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in
+#    pip-compile --extra-index-url=<https://private-pypi-repo-url/> --no-emit-index-url --output-file=full_requirements.txt mlflow_requirements.in postgres_requirements.in requirements.in
 #
 absl-py==0.11.0
     # via
@@ -54,7 +54,9 @@ azure-mgmt-resource==8.0.0
 azure-mgmt-storage==7.1.0
     # via azureml-core
 azure-storage-blob==12.9.0
-    # via azure-storage-file-datalake
+    # via
+    #   azure-storage-file-datalake
+    #   gordo-client
 azure-storage-file-datalake==12.3.1
     # via gordo-dataset
 azureml-contrib-run==1.0.85
@@ -67,6 +69,8 @@ backports.tempfile==1.0
     # via azureml-core
 backports.weakref==1.0.post1
     # via backports.tempfile
+cached-property==1.5.2
+    # via h5py
 cachetools==4.1.1
     # via
     #   google-auth
@@ -147,9 +151,9 @@ google-auth-oauthlib==0.4.1
     # via tensorboard
 google-pasta==0.2.0
     # via tensorflow
-gordo-client==4.1.1
+gordo-client==4.1.3
     # via -r requirements.in
-gordo-dataset==3.2.2
+gordo-dataset==3.3.0.dev1
     # via
     #   -r requirements.in
     #   gordo-client
@@ -165,7 +169,7 @@ gunicorn==20.0.4
     # via
     #   -r requirements.in
     #   mlflow
-h5py==2.10.0
+h5py==3.1.0
     # via
     #   -r requirements.in
     #   tensorflow
@@ -201,10 +205,14 @@ jsonpickle==1.2
     #   azureml-mlflow
 jsonschema==3.2.0
     # via flask-restplus
+keras==2.7.0
+    # via tensorflow
 keras-preprocessing==1.1.2
     # via tensorflow
 kiwisolver==1.1.0
     # via matplotlib
+libclang==12.0.0
+    # via tensorflow
 mako==1.1.1
     # via alembic
 markdown==3.1.1
@@ -265,11 +273,10 @@ numexpr==2.7.1
     # via
     #   -r requirements.in
     #   gordo-dataset
-numpy==1.19.5
+numpy==1.21.0
     # via
     #   -r requirements.in
     #   catboost
-    #   gordo-client
     #   gordo-dataset
     #   h5py
     #   keras-preprocessing
@@ -319,10 +326,8 @@ protobuf==3.11.2
     #   tensorflow
 psycopg2-binary==2.8.4
     # via -r postgres_requirements.in
-pyarrow==0.17.1
-    # via
-    #   gordo-client
-    #   gordo-dataset
+pyarrow==6.0.1
+    # via gordo-dataset
 pyasn1==0.4.8
     # via
     #   ndg-httpsclient
@@ -406,7 +411,6 @@ ruamel.yaml==0.15.89
 scikit-learn==0.23.2
     # via
     #   -r requirements.in
-    #   gordo-client
     #   gordo-dataset
 scipy==1.4.1
     # via
@@ -434,7 +438,6 @@ six==1.15.0
     #   google-auth
     #   google-pasta
     #   grpcio
-    #   h5py
     #   influxdb
     #   isodate
     #   jsonschema
@@ -447,7 +450,6 @@ six==1.15.0
     #   python-dateutil
     #   querystring-parser
     #   retrying
-    #   tensorboard
     #   tensorflow
     #   websocket-client
 smmap2==2.0.5
@@ -462,13 +464,17 @@ stringcase==1.2.0
     # via dataclasses-json
 tabulate==0.8.6
     # via databricks-cli
-tensorboard==2.4.1
+tensorboard==2.7.0
     # via tensorflow
+tensorboard-data-server==0.6.1
+    # via tensorboard
 tensorboard-plugin-wit==1.8.0
     # via tensorboard
-tensorflow==2.4.4
+tensorflow==2.7.0
     # via -r requirements.in
-tensorflow-estimator==2.4.0
+tensorflow-estimator==2.7.0
+    # via tensorflow
+tensorflow-io-gcs-filesystem==0.23.1
     # via tensorflow
 termcolor==1.1.0
     # via tensorflow

diff --git a/requirements/requirements.in b/requirements/requirements.in
@@ -3,7 +3,7 @@ Click~=7.0
 dictdiffer~=0.8
 dataclasses-json~=0.3
 gunicorn~=20.0
-h5py~=2.8
+h5py~=3.1.0
 jinja2~=2.11
 numpy~=1.18
 pandas~=1.0
@@ -12,7 +12,7 @@ python-dateutil~=2.8
 pyyaml~=5.3
 requests~=2.25
 scikit-learn~=0.23
-tensorflow~=2.4.4
+tensorflow~=2.7.0
 Flask~=1.0
 flask-restplus~=0.12
 Werkzeug==0.16.1 # flask-restplus requires Werkzeug, but is incompatible with 1.0.0. When that is fixed this explicit dependency can be dropped
@@ -24,8 +24,8 @@ typing_extensions~=3.7
 prometheus_client~=0.7.1
 azure-identity~=1.4.0
 PyYAML~=5.4
-gordo-dataset~=3.2.2
+gordo-dataset~=3.3.0dev1
 jeepney>=0.6
 packaging~=20.7
 pydantic>=1.7.4
-gordo-client~=4.1.1
+gordo-client~=4.1.3