[Mlflow_utils] - mlflow model server (#811)

* mlflow server * small fix to test * small fixes to ms and nb * small fixes to mlrun version * update requirements lightgbm * added req * Added xgboost to req --------- Co-authored-by: Avi Asulin <[email protected]>
mlrun · Jul 17, 2024 · cb119a1 · cb119a1
1 parent ce8f612
commit cb119a1
Show file tree

Hide file tree

Showing 6 changed files with 1,644 additions and 0 deletions.
diff --git a/mlflow_utils/function.yaml b/mlflow_utils/function.yaml
@@ -0,0 +1,32 @@
+metadata:
+  name: mlflow-utils
+  categories:
+  - genai
+  - model-serving
+  - machine-learning
+  - mlflow
+  tag: ''
+spec:
+  default_handler: ''
+  image: mlrun/mlrun
+  command: ''
+  base_image_pull: false
+  default_class: MLFlowModelServer
+  function_handler: mlflow-utils:handler
+  disable_auto_mount: false
+  build:
+    origin_filename: ''
+    code_origin: ''
+    requirements:
+    - mlflow==2.12.2
+    functionSourceCode: aW1wb3J0IHppcGZpbGUKZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgRGljdAppbXBvcnQgbWxmbG93CmZyb20gbWxydW4uc2VydmluZy52Ml9zZXJ2aW5nIGltcG9ydCBWMk1vZGVsU2VydmVyCmltcG9ydCBwYW5kYXMgYXMgcGQKCgpjbGFzcyBNTEZsb3dNb2RlbFNlcnZlcihWMk1vZGVsU2VydmVyKToKICAgICIiIgogICAgTUxGbG93IHRyYWNrZXIgTW9kZWwgc2VydmluZyBjbGFzcywgaW5oZXJpdGluZyB0aGUgVjJNb2RlbFNlcnZlciBjbGFzcyBmb3IgYmVpbmcgaW5pdGlhbGl6ZWQgYXV0b21hdGljYWxseSBieSB0aGUgbW9kZWwKICAgIHNlcnZlciBhbmQgYmUgYWJsZSB0byBydW4gbG9jYWxseSBhcyBwYXJ0IG9mIGEgbnVjbGlvIHNlcnZlcmxlc3MgZnVuY3Rpb24sIG9yIGFzIHBhcnQgb2YgYSByZWFsLXRpbWUgcGlwZWxpbmUuCiAgICAiIiIKCiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkcyBhbiBtb2RlbCB0aGF0IHdhcyBsb2dnZWQgYnkgdGhlIE1MRmxvdyB0cmFja2VyIG1vZGVsCiAgICAgICAgIiIiCiAgICAgICAgIyBVbnppcCB0aGUgbW9kZWwgZGlyIGFuZCB0aGVuIHVzZSBtbGZsb3cncyBsb2FkIGZ1bmN0aW9uCiAgICAgICAgbW9kZWxfZmlsZSwgXyA9IHNlbGYuZ2V0X21vZGVsKCIuemlwIikKICAgICAgICBtb2RlbF9wYXRoX3VuemlwID0gbW9kZWxfZmlsZS5yZXBsYWNlKCIuemlwIiwgIiIpCgogICAgICAgIHdpdGggemlwZmlsZS5aaXBGaWxlKG1vZGVsX2ZpbGUsICJyIikgYXMgemlwX3JlZjoKICAgICAgICAgICAgemlwX3JlZi5leHRyYWN0YWxsKG1vZGVsX3BhdGhfdW56aXApCgogICAgICAgIHNlbGYubW9kZWwgPSBtbGZsb3cucHlmdW5jLmxvYWRfbW9kZWwobW9kZWxfcGF0aF91bnppcCkKCiAgICBkZWYgcHJlZGljdChzZWxmLCByZXF1ZXN0OiBEaWN0W3N0ciwgQW55XSkgLT4gbGlzdDoKICAgICAgICAiIiIKICAgICAgICBJbmZlciB0aGUgaW5wdXRzIHRocm91Z2ggdGhlIG1vZGVsLiBUaGUgaW5mZXJyZWQgZGF0YSB3aWxsCiAgICAgICAgYmUgcmVhZCBmcm9tIHRoZSAiaW5wdXRzIiBrZXkgb2YgdGhlIHJlcXVlc3QuCgogICAgICAgIDpwYXJhbSByZXF1ZXN0OiBUaGUgcmVxdWVzdCB0byB0aGUgbW9kZWwgdXNpbmcgeGdib29zdCdzIHByZWRpY3QuCiAgICAgICAgICAgICAgICBUaGUgaW5wdXQgdG8gdGhlIG1vZGVsIHdpbGwgYmUgcmVhZCBmcm9tIHRoZSAiaW5wdXRzIiBrZXkuCgogICAgICAgIDpyZXR1cm46IFRoZSBtb2RlbCdzIHByZWRpY3Rpb24gb24gdGhlIGdpdmVuIGlucHV0LgogICAgICAgICIiIgoKICAgICAgICAjIEdldCB0aGUgaW5wdXRzIGFuZCBzZXQgdG8gYWNjZXB0ZWQgdHlwZToKICAgICAgICBpbnB1dHMgPSBwZC5EYXRhRnJhbWUocmVxdWVzdFsiaW5wdXRzIl0pCgogICAgICAgICMgUHJlZGljdCB1c2luZyB0aGUgbW9kZWwncyBwcmVkaWN0IGZ1bmN0aW9uOgogICAgICAgIHByZWRpY3Rpb25zID0gc2VsZi5tb2RlbC5wcmVkaWN0KGlucHV0cykKCiAgICAgICAgIyBSZXR1cm4gYXMgbGlzdDoKICAgICAgICByZXR1cm4gcHJlZGljdGlvbnMudG9saXN0KCkKCmZyb20gbWxydW4ucnVudGltZXMgaW1wb3J0IG51Y2xpb19pbml0X2hvb2sKZGVmIGluaXRfY29udGV4dChjb250ZXh0KToKICAgIG51Y2xpb19pbml0X2hvb2soY29udGV4dCwgZ2xvYmFscygpLCAnc2VydmluZ192MicpCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICByZXR1cm4gY29udGV4dC5tbHJ1bl9oYW5kbGVyKGNvbnRleHQsIGV2ZW50KQo=
+  min_replicas: 1
+  description: Mlflow model server, and additional utils.
+  max_replicas: 4
+  source: ''
+  function_kind: serving_v2
+  env:
+  - name: MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK
+    value: enabled
+verbose: false
+kind: serving
diff --git a/mlflow_utils/item.yaml b/mlflow_utils/item.yaml
@@ -0,0 +1,32 @@
+apiVersion: v1
+categories:
+- genai
+- model-serving
+- machine-learning
+- mlflow
+description: Mlflow model server, and additional utils.
+doc: ''
+example: mlflow_utils.ipynb
+generationDate: 2024-05-23:12-00
+hidden: false
+icon: ''
+labels:
+  author: zeevr
+maintainers: []
+marketplaceType: ''
+mlrunVersion: 1.7.0-rc17
+name: mlflow_utils
+platformVersion: ''
+spec:
+  customFields:
+    default_class: MLFlowModelServer
+  filename: mlflow_utils.py
+  handler: handler
+  image: mlrun/mlrun
+  kind: serving
+  requirements:
+  - mlflow==2.12.2
+  - lightgbm
+  - xgboost
+url: ''
+version: 1.0.0
diff --git a/mlflow_utils/mlflow_utils.ipynb b/mlflow_utils/mlflow_utils.ipynb
diff --git a/mlflow_utils/mlflow_utils.py b/mlflow_utils/mlflow_utils.py
@@ -0,0 +1,45 @@
+import zipfile
+from typing import Any, Dict
+import mlflow
+from mlrun.serving.v2_serving import V2ModelServer
+import pandas as pd
+
+
+class MLFlowModelServer(V2ModelServer):
+    """
+    MLFlow tracker Model serving class, inheriting the V2ModelServer class for being initialized automatically by the model
+    server and be able to run locally as part of a nuclio serverless function, or as part of a real-time pipeline.
+    """
+
+    def load(self):
+        """
+        loads a model that was logged by the MLFlow tracker model
+        """
+        # Unzip the model dir and then use mlflow's load function
+        model_file, _ = self.get_model(".zip")
+        model_path_unzip = model_file.replace(".zip", "")
+
+        with zipfile.ZipFile(model_file, "r") as zip_ref:
+            zip_ref.extractall(model_path_unzip)
+
+        self.model = mlflow.pyfunc.load_model(model_path_unzip)
+
+    def predict(self, request: Dict[str, Any]) -> list:
+        """
+        Infer the inputs through the model. The inferred data will
+        be read from the "inputs" key of the request.
+
+        :param request: The request to the model using xgboost's predict.
+                The input to the model will be read from the "inputs" key.
+
+        :return: The model's prediction on the given input.
+        """
+
+        # Get the inputs and set to accepted type:
+        inputs = pd.DataFrame(request["inputs"])
+
+        # Predict using the model's predict function:
+        predictions = self.model.predict(inputs)
+
+        # Return as list:
+        return predictions.tolist()
diff --git a/mlflow_utils/requirements.txt b/mlflow_utils/requirements.txt
@@ -0,0 +1,3 @@
+mlflow==2.12.2
+lightgbm
+xgboost
diff --git a/mlflow_utils/test_mlflow_utils.py b/mlflow_utils/test_mlflow_utils.py
@@ -0,0 +1,179 @@
+# Copyright 2018 Iguazio
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+import tempfile
+
+import lightgbm as lgb
+import mlflow
+import mlflow.environment_variables
+import mlflow.xgboost
+import pytest
+import xgboost as xgb
+from sklearn import datasets
+from sklearn.metrics import accuracy_score, log_loss
+from sklearn.model_selection import train_test_split
+
+import os
+# os.environ["MLRUN_IGNORE_ENV_FILE"] = "True"  #TODO remove before push
+
+import mlrun
+import mlrun.launcher.local
+#  Important:
+#  unlike mlconf which resets back to default after each test run, the mlflow configurations
+#  and env vars don't, so at the end of each test we need to redo anything we set in that test.
+#  what we cover in these tests: logging "regular" runs with, experiment name, run id and context
+#  name (last two using mlconf), failing run mid-way, and a run with no handler.
+#  we also test here importing of runs, artifacts and models from a previous run.
+
+# simple mlflow example of lgb logging
+def lgb_run():
+    # prepare train and test data
+    iris = datasets.load_iris()
+    X = iris.data
+    y = iris.target
+    X_train, X_test, y_train, y_test = train_test_split(
+        X, y, test_size=0.2, random_state=42
+    )
+
+    # enable auto logging
+    mlflow.lightgbm.autolog()
+
+    train_set = lgb.Dataset(X_train, label=y_train)
+
+    with mlflow.start_run():
+        # train model
+        params = {
+            "objective": "multiclass",
+            "num_class": 3,
+            "learning_rate": 0.1,
+            "metric": "multi_logloss",
+            "colsample_bytree": 1.0,
+            "subsample": 1.0,
+            "seed": 42,
+        }
+        # model and training data are being logged automatically
+        model = lgb.train(
+            params,
+            train_set,
+            num_boost_round=10,
+            valid_sets=[train_set],
+            valid_names=["train"],
+        )
+
+        # evaluate model
+        y_proba = model.predict(X_test)
+        y_pred = y_proba.argmax(axis=1)
+        loss = log_loss(y_test, y_proba)
+        acc = accuracy_score(y_test, y_pred)
+
+        # log metrics
+        mlflow.log_metrics({"log_loss": loss, "accuracy": acc})
+
+
+# simple mlflow example of xgb logging
+def xgb_run():
+    # prepare train and test data
+    iris = datasets.load_iris()
+    x = iris.data
+    y = iris.target
+    x_train, x_test, y_train, y_test = train_test_split(
+        x, y, test_size=0.2, random_state=42
+    )
+
+    # enable auto logging
+    mlflow.xgboost.autolog()
+
+    dtrain = xgb.DMatrix(x_train, label=y_train)
+    dtest = xgb.DMatrix(x_test, label=y_test)
+
+    with mlflow.start_run():
+        # train model
+        params = {
+            "objective": "multi:softprob",
+            "num_class": 3,
+            "learning_rate": 0.3,
+            "eval_metric": "mlogloss",
+            "colsample_bytree": 1.0,
+            "subsample": 1.0,
+            "seed": 42,
+        }
+        # model and training data are being logged automatically
+        model = xgb.train(params, dtrain, evals=[(dtrain, "train")])
+        # evaluate model
+        y_proba = model.predict(dtest)
+        y_pred = y_proba.argmax(axis=1)
+        loss = log_loss(y_test, y_proba)
+        acc = accuracy_score(y_test, y_pred)
+        # log metrics
+        mlflow.log_metrics({"log_loss": loss, "accuracy": acc})
+
+
+@pytest.mark.parametrize("handler", ["xgb_run", "lgb_run"])
+def test_track_run_with_experiment_name(handler):
+    """
+    This test is for tracking a run logged by mlflow into mlrun while it's running using the experiment name.
+    first activate the tracking option in mlconf, then we name the mlflow experiment,
+    then we run some code that is being logged by mlflow using mlrun,
+    and finally compare the mlrun we tracked with the original mlflow run using the validate func
+    """
+    # Enable general tracking
+    mlrun.mlconf.external_platform_tracking.enabled = True
+    # Set the mlflow experiment name
+    mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.set(f"{handler}_test_track")
+    with tempfile.TemporaryDirectory() as test_directory:
+        mlflow.set_tracking_uri(test_directory)  # Tell mlflow where to save logged data
+
+        # Create a project for this tester:
+        project = mlrun.get_or_create_project(name="default", context=test_directory)
+
+        # Create a MLRun function using the tester source file (all the functions must be located in it):
+        func = project.set_function(
+            func=__file__,
+            name=f"{handler}-test",
+            kind="job",
+            image="mlrun/mlrun",
+            requirements=["mlflow"],
+        )
+        # mlflow creates a dir to log the run, this makes it in the tmpdir we create
+        trainer_run = func.run(
+            local=True,
+            handler=handler,
+            artifact_path=test_directory,
+        )
+
+        serving_func = project.set_function(
+            func=os.path.abspath("function.yaml"),
+            name=f"{handler}-server",
+        )
+        model_name = f"{handler}-model"
+        # Add the model
+        upper_handler = handler.replace("_", "-")
+        model_path = test_directory + f"/{upper_handler}-test-{upper_handler}/0/model/"
+        serving_func.add_model(
+            model_name,
+            class_name="MLFlowModelServer",
+            model_path=model_path,
+        )
+
+        # Create a mock server
+        server = serving_func.to_mock_server()
+
+        # An example taken randomly
+        result = server.test(f"/v2/models/{model_name}/predict", {"inputs": [[5.1, 3.5, 1.4, 0.2]]})
+    print(result)
+    assert result
+    # unset mlflow experiment name to default
+    mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.unset()
+
+