From 639bb270ac1a8f12f7d0d1222b94f0b213c2a0ac Mon Sep 17 00:00:00 2001 From: Eyal Danieli Date: Wed, 25 Sep 2024 09:41:02 +0300 Subject: [PATCH] Align to master branch (#826) * [Category] Fix and add categories to functions (#808) * [Category] Fix and add categories to functions * bump version in structured * test is not valid in huggingface_serving * Fix duplicated footer * Fix duplicated footer * revert python version change as it will be done in another PR * comments * comments * Bump python:3.6 to python:3.9 (#810) * [Describe] Align describe to new pandas version (#812) * [Describe] Align describe to new pandas version * minor test fix * update mlrun version * add dask to requirements * remove dask * update numpy version * debug * debug * debug * remove dask tests * remove debug code * [get_offline_features] Updated to mlrun 1.6.3 (#813) * [Feature-selection] Replace matplotlib with plotly (#815) * Iguazio-cicd user token updated Iguazio-cicd user token updated in repo secrets: https://github.com/mlrun/functions/settings/secrets/actions MARKETPLACE_ACCESS_TOKEN_V3 new token gh...Zmf was set around April * forcing iguazio-cicd auth forcing iguazio-cicd to deal with Author identity unknown * checkout@v3 to v4 and echo * [Mlflow_utils] - mlflow model server (#811) * mlflow server * small fix to test * small fixes to ms and nb * small fixes to mlrun version * update requirements lightgbm * added req * Added xgboost to req --------- Co-authored-by: Avi Asulin <34214569+aviaIguazio@users.noreply.github.com> * [Mlflow] Remove mlflow tag (#825) * remove mlflow tag * remove mlflow tag --------- Co-authored-by: Avi Asulin <34214569+aviaIguazio@users.noreply.github.com> * align feature_selection yaml --------- Co-authored-by: Avi Asulin <34214569+aviaIguazio@users.noreply.github.com> Co-authored-by: Yonatan Shelach <92271540+yonishelach@users.noreply.github.com> Co-authored-by: rokatyy Co-authored-by: Katerina Molchanova <35141662+rokatyy@users.noreply.github.com> Co-authored-by: nashpaz123 <44337075+nashpaz123@users.noreply.github.com> Co-authored-by: ZeevRispler <73653682+ZeevRispler@users.noreply.github.com> --- .github/workflows/test-all.yaml | 23 +- churn_server/churn_server.py | 10 - churn_server/function.yaml | 4 +- churn_server/item.yaml | 2 +- describe/describe.py | 39 +- describe/function.yaml | 96 +- describe/item.yaml | 4 +- describe/requirements.txt | 1 - describe/test_describe.py | 76 -- feature_selection/feature_selection.py | 52 +- feature_selection/function.yaml | 4 +- feature_selection/requirements.txt | 4 +- feature_selection/test_feature_selection.py | 57 +- hugging_face_serving/function.yaml | 41 +- hugging_face_serving/item.yaml | 5 +- mlflow_utils/function.yaml | 31 + mlflow_utils/item.yaml | 31 + mlflow_utils/mlflow_utils.ipynb | 1353 +++++++++++++++++++ mlflow_utils/mlflow_utils.py | 45 + mlflow_utils/requirements.txt | 3 + mlflow_utils/test_mlflow_utils.py | 179 +++ model_server/function.yaml | 2 +- pii_recognizer/function.yaml | 3 +- pii_recognizer/item.yaml | 3 +- pyannote_audio/function.yaml | 6 +- pyannote_audio/item.yaml | 6 +- question_answering/function.yaml | 4 +- question_answering/item.yaml | 4 +- silero_vad/function.yaml | 6 +- silero_vad/item.yaml | 6 +- structured_data_generator/function.yaml | 4 +- structured_data_generator/item.yaml | 4 +- text_to_audio_generator/function.yaml | 3 +- text_to_audio_generator/item.yaml | 3 +- tf2_serving/function.yaml | 2 +- transcribe/function.yaml | 5 +- transcribe/item.yaml | 4 +- translate/function.yaml | 24 +- translate/item.yaml | 3 +- v2_model_server/function.yaml | 4 +- v2_model_server/item.yaml | 2 +- v2_model_server/v2_model_server.py | 11 - 42 files changed, 1860 insertions(+), 309 deletions(-) create mode 100644 mlflow_utils/function.yaml create mode 100644 mlflow_utils/item.yaml create mode 100644 mlflow_utils/mlflow_utils.ipynb create mode 100644 mlflow_utils/mlflow_utils.py create mode 100644 mlflow_utils/requirements.txt create mode 100644 mlflow_utils/test_mlflow_utils.py diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml index 4832c6456..a09ba17a2 100644 --- a/.github/workflows/test-all.yaml +++ b/.github/workflows/test-all.yaml @@ -15,7 +15,7 @@ jobs: run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}" id: myref - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - id: set-matrix # This is very hacky, but it goes like that: # 1) Associate base_ref with origin/base_ref since actions/checkout doesn't do it, if we don't do that we won't be able to check the actual diff @@ -63,7 +63,7 @@ jobs: steps: # Source - name: Checkout current repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: functions # Install python 3.9 @@ -106,11 +106,11 @@ jobs: run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}" id: branch - name: Checkout current repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: path: functions - name: Checkout Marketplace - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: repository: mlrun/marketplace path: marketplace @@ -136,6 +136,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.MARKETPLACE_ACCESS_TOKEN_V3 }} USERNAME: iguazio-cicd + USEREMAIL: iguaziocicd@gmail.com REPO_PATH: marketplace BASE_REPO: mlrun BASE_BRANCH: master @@ -153,24 +154,30 @@ jobs: exit 1; }; git config --local user.name $USERNAME + git config --local user.email $USEREMAIL git branch --set-upstream-to origin/master git remote -v - echo "Checking out [$BRANCH_NAME]..." + echo "1. Checking out [$BRANCH_NAME]..." git checkout -b $BRANCH_NAME - echo "Checking out [$BASE_BRANCH]..." + echo "2. Checking out [$BASE_BRANCH]..." git checkout $BASE_BRANCH git pull - echo "Checking out [$BRANCH_NAME]..." + echo "3. Checking out [$BRANCH_NAME]..." git checkout $BRANCH_NAME + echo "3a. merging" git merge $BASE_BRANCH + echo "3b. status" git status git status --ignored find . -type f | xargs ls -artl + echo "3b. add" git add --all git status git status --ignored - echo "Commiting changes..." + echo "4. Commiting changes..." + echo "4a. git rev-parse" git rev-parse --show-toplevel + echo "4b. git commit" git commit -a -m "Automatically generated by github-worflow[bot] for commit: $COMMIT_SHA" git status git status --ignored diff --git a/churn_server/churn_server.py b/churn_server/churn_server.py index 55f37f280..def2850da 100644 --- a/churn_server/churn_server.py +++ b/churn_server/churn_server.py @@ -43,13 +43,3 @@ def predict(self, body): except Exception as e: raise Exception("Failed to predict %s" % e) - -from mlrun.runtimes import nuclio_init_hook - - -def init_context(context): - nuclio_init_hook(context, globals(), "serving_v2") - - -def handler(context, event): - return context.mlrun_handler(context, event) diff --git a/churn_server/function.yaml b/churn_server/function.yaml index 7a73c11a4..14f6c8cef 100644 --- a/churn_server/function.yaml +++ b/churn_server/function.yaml @@ -29,14 +29,14 @@ spec: annotations: nuclio.io/generated_by: function generated from /User/functions/churn_server/churn_server.py spec: - runtime: python:3.6 + runtime: python:3.9 handler: churn_server:handler env: [] volumes: [] build: commands: [] noBaseImagesPull: true - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawoKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgInNlcnZpbmdfdjIiKQoKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg== source: '' function_kind: serving_v2 default_class: ChurnModel diff --git a/churn_server/item.yaml b/churn_server/item.yaml index 3a3b4b6ba..09ba9b713 100644 --- a/churn_server/item.yaml +++ b/churn_server/item.yaml @@ -29,4 +29,4 @@ spec: - xgboost==1.3.1 - lifelines==0.22.8 url: '' -version: 1.1.0 +version: 1.2.0 diff --git a/describe/describe.py b/describe/describe.py index def92782b..27d789f5b 100644 --- a/describe/describe.py +++ b/describe/describe.py @@ -36,7 +36,7 @@ ) from mlrun.datastore import DataItem from mlrun.execution import MLClientCtx -from mlrun.feature_store import FeatureSet, FeatureVector +from mlrun.feature_store import FeatureSet from plotly.subplots import make_subplots pd.set_option("display.float_format", lambda x: "%.2f" % x) @@ -234,24 +234,24 @@ def _create_features_histogram_artifacts( if label_column is not None and problem_type == "classification": all_labels = df[label_column].unique() visible = True - for (columnName, _) in df.iteritems(): - if columnName == label_column: + for column_name in df.columns: + if column_name == label_column: continue if label_column is not None and problem_type == "classification": for label in all_labels: sub_fig = go.Histogram( histfunc="count", - x=df.loc[df[label_column] == label][columnName], + x=df.loc[df[label_column] == label][column_name], name=str(label), visible=visible, ) - figs[f"{columnName}@?@{label}"] = sub_fig + figs[f"{column_name}@?@{label}"] = sub_fig else: - sub_fig = go.Histogram(histfunc="count", x=df[columnName], visible=visible) - figs[f"{columnName}@?@{1}"] = sub_fig + sub_fig = go.Histogram(histfunc="count", x=df[column_name], visible=visible) + figs[f"{column_name}@?@{1}"] = sub_fig if visible: - first_feature_name = columnName + first_feature_name = column_name visible = False fig = go.Figure() @@ -338,7 +338,7 @@ def _create_features_2d_scatter_artifacts( Create and log a scatter-2d artifact for each couple of features """ features = [ - columnName for (columnName, _) in df.iteritems() if columnName != label_column + column_name for column_name in df.columns if column_name != label_column ] max_feature_len = float(max(len(elem) for elem in features)) if label_column is not None: @@ -450,11 +450,12 @@ def _create_violin_artifact( plot_num = 0 - for (columnName, columnData) in df.iteritems(): + for column_name in df.columns: + column_data = df[column_name] violin = go.Violin( - x=[columnName] * columnData.shape[0], - y=columnData, - name=columnName, + x=[column_name] * column_data.shape[0], + y=column_data, + name=column_name, ) fig.add_trace( @@ -491,15 +492,15 @@ def _create_imbalance_artifact( """ if label_column: if problem_type == "classification": + values_column = "count" labels_count = df[label_column].value_counts().sort_index() df_labels_count = pd.DataFrame(labels_count) - df_labels_count.rename(columns={label_column: "Total"}, inplace=True) df_labels_count[label_column] = labels_count.index - df_labels_count["weights"] = df_labels_count["Total"] / sum( - df_labels_count["Total"] + df_labels_count.rename(columns={"": values_column}, inplace=True) + df_labels_count[values_column] = df_labels_count[values_column] / sum( + df_labels_count[values_column] ) - - fig = px.pie(df_labels_count, names=label_column, values="Total") + fig = px.pie(df_labels_count, names=label_column, values=values_column) else: fig = px.histogram( histfunc="count", @@ -532,7 +533,7 @@ def _create_corr_artifact( """ if label_column is not None: df = df.drop([label_column], axis=1) - tblcorr = df.corr() + tblcorr = df.corr(numeric_only=True) extra_data["correlation-matrix-csv"] = context.log_artifact( TableArtifact("correlation-matrix-csv", df=tblcorr, visible=True), local_path=f"{plots_dest}/correlation-matrix.csv", diff --git a/describe/function.yaml b/describe/function.yaml index 6f518bbfa..f989c6ec7 100644 --- a/describe/function.yaml +++ b/describe/function.yaml @@ -1,54 +1,12 @@ +verbose: false kind: job -metadata: - name: describe - tag: '' - hash: 38ac49fa67c647c7defc230c9853a22657690a9e - project: '' - labels: - author: Davids - categories: - - data-analysis spec: - command: '' - args: [] - image: mlrun/mlrun - build: - functionSourceCode:  - commands: [] - code_origin: https://github.com/davesh0812/functions.git#6c5f9ed5f39ccb1e0f478eee7b4aa10994dfd22b:/Users/davids/Projects/functions/describe/describe.py - origin_filename: /Users/davids/Projects/functions/describe/describe.py entry_points: analyze: - name: analyze - doc: 'The function will output the following artifacts per - - column within the data frame (based on data types) - - If the data has more than 500,000 sample we - - sample randomly 500,000 samples: - - - describe csv - - histograms - - scatter-2d - - violin chart - - correlation-matrix chart - - correlation-matrix csv - - imbalance pie chart - - imbalance-weights-vec csv' parameters: - name: context type: MLClientCtx doc: The function context - default: '' - name: name type: str doc: Key of dataset to database ("dataset" for default) @@ -86,15 +44,47 @@ spec: - name: dask_client doc: Dask client object default: null - outputs: - - default: '' + has_varargs: false lineno: 46 - description: describe and visualizes dataset stats - default_handler: analyze + outputs: + - type: None + name: analyze + has_kwargs: false + doc: 'The function will output the following artifacts per + + column within the data frame (based on data types) + + If the data has more than 500,000 sample we + + sample randomly 500,000 samples: + + + describe csv + + histograms + + scatter-2d + + violin chart + + correlation-matrix chart + + correlation-matrix csv + + imbalance pie chart + + imbalance-weights-vec csv' disable_auto_mount: false - env: [] - priority_class_name: '' - preemption_mode: prevent - affinity: null - tolerations: null -verbose: false + default_handler: analyze + description: describe and visualizes dataset stats + build: + functionSourceCode:  + origin_filename: '' + code_origin: '' + image: mlrun/mlrun + command: '' +metadata: + tag: '' + name: describe + categories: + - data-analysis diff --git a/describe/item.yaml b/describe/item.yaml index 4703771b7..47f36787f 100644 --- a/describe/item.yaml +++ b/describe/item.yaml @@ -11,7 +11,7 @@ labels: author: Davids maintainers: [] marketplaceType: '' -mlrunVersion: 1.4.1 +mlrunVersion: 1.6.0 name: describe platformVersion: 3.5.3 spec: @@ -21,4 +21,4 @@ spec: kind: job requirements: [] url: '' -version: 1.2.0 +version: 1.3.0 diff --git a/describe/requirements.txt b/describe/requirements.txt index 8dbc3e68b..a96b6ff1b 100644 --- a/describe/requirements.txt +++ b/describe/requirements.txt @@ -1,6 +1,5 @@ scikit-learn~=1.0.2 plotly~=5.16.1 pytest~=7.0.1 -pandas~=1.3.5 matplotlib~=3.5.1 seaborn~=0.11.2 diff --git a/describe/test_describe.py b/describe/test_describe.py index 1a2270a86..9ffe39abb 100644 --- a/describe/test_describe.py +++ b/describe/test_describe.py @@ -271,79 +271,3 @@ def _create_data(n_samples, n_features, n_classes, n_informative, reg=False): df["timestamp"] = [pd.Timestamp("2022").now()] * n_samples df.to_parquet("artifacts/random_dataset.parquet") return df - - -def _create_dask_func(uri): - dask_cluster_name = "dask-cluster" - dask_cluster = new_function(dask_cluster_name, kind="dask", image="mlrun/ml-models") - dask_cluster.spec.remote = False - dask_uri = uri - dask_cluster.export(dask_uri) - - -def test_import_function_describe_dask(): - dask_uri = "dask_func.yaml" - _create_dask_func(dask_uri) - describe_func = import_function("function.yaml") - is_test_passed = True - _create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3) - describe_func.spec.command = "describe_dask.py" - - try: - describe_run = describe_func.run( - name="task-describe", - handler="analyze", - inputs={"table": DATA_PATH}, - params={ - "label_column": "label", - "dask_function": dask_uri, - "dask_flag": True, - }, - artifact_path=os.path.abspath("./artifacts"), - local=True, - ) - - except Exception as exception: - print(f"- The test failed - raised the following error:\n- {exception}") - is_test_passed = False - _validate_paths( - { - "imbalance.html", - "imbalance-weights-vec.csv", - } - ) - assert is_test_passed - - -def test_code_to_function_describe_dask(): - dask_uri = "dask_func.yaml" - _create_dask_func(dask_uri) - describe_func = code_to_function(filename="describe.py", kind="local") - is_test_passed = True - _create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3) - describe_func.spec.command = "describe_dask.py" - - try: - describe_run = describe_func.run( - name="task-describe", - handler="analyze", - inputs={"table": DATA_PATH}, - params={ - "label_column": "label", - "dask_function": dask_uri, - "dask_flag": True, - }, - artifact_path=os.path.abspath("./artifacts"), - local=True, - ) - - except Exception as exception: - print(f"- The test failed - raised the following error:\n- {exception}") - is_test_passed = False - _validate_paths( - { - "imbalance.html", - "imbalance-weights-vec.csv", - } - ) - assert is_test_passed diff --git a/feature_selection/feature_selection.py b/feature_selection/feature_selection.py index 630a09694..30fa8f904 100644 --- a/feature_selection/feature_selection.py +++ b/feature_selection/feature_selection.py @@ -13,17 +13,15 @@ # limitations under the License. # import json -import os -import matplotlib.pyplot as plt import mlrun import mlrun.datastore -import mlrun.utils import mlrun.feature_store as fs +import mlrun.utils import numpy as np import pandas as pd -import seaborn as sns -from mlrun.artifacts import PlotArtifact +import plotly.express as px +from mlrun.artifacts import PlotlyArtifact from mlrun.datastore.targets import ParquetTarget # MLRun utils from mlrun.utils.helpers import create_class @@ -42,15 +40,6 @@ } -def _clear_current_figure(): - """ - Clear matplotlib current figure. - """ - plt.cla() - plt.clf() - plt.close() - - def show_values_on_bars(axs, h_v="v", space=0.4): def _show_on_single_plot(ax_): if h_v == "v": @@ -74,33 +63,18 @@ def _show_on_single_plot(ax_): def plot_stat(context, stat_name, stat_df): - _clear_current_figure() - - # Add chart - ax = plt.axes() - stat_chart = sns.barplot( + sorted_df = stat_df.sort_values(stat_name) + fig = px.bar( + data_frame=sorted_df, x=stat_name, - y="index", - data=stat_df.sort_values(stat_name, ascending=False).reset_index(), - ax=ax, + y=sorted_df.index, + title=f"{stat_name} feature scores", + color=stat_name, ) - plt.tight_layout() - - for p in stat_chart.patches: - width = p.get_width() - plt.text( - 5 + p.get_width(), - p.get_y() + 0.55 * p.get_height(), - "{:1.2f}".format(width), - ha="center", - va="center", - ) - context.log_artifact( - PlotArtifact(f"{stat_name}", body=plt.gcf()), - local_path=os.path.join("plots", "feature_selection", f"{stat_name}.html"), + item=PlotlyArtifact(key=stat_name, figure=fig), + local_path=f"{stat_name}.html", ) - _clear_current_figure() def feature_selection( @@ -115,7 +89,6 @@ def feature_selection( sample_ratio: float = None, output_vector_name: float = None, ignore_type_errors: bool = False, - is_feature_vector: bool = False, ): """ Applies selected feature selection statistical functions or models on our 'df_artifact'. @@ -138,10 +111,9 @@ def feature_selection( model name (ex. LinearSVC), formalized json (contains 'CLASS', 'FIT', 'META') or a path to such json file. :param max_scaled_scores: produce feature scores table scaled with max_scaler. - :param sample_ratio: percentage of the dataset the user whishes to compute the feature selection process on. + :param sample_ratio: percentage of the dataset the user wishes to compute the feature selection process on. :param output_vector_name: creates a new feature vector containing only the identifies features. :param ignore_type_errors: skips datatypes that are neither float nor int within the feature vector. - :param is_feature_vector: bool stating if the data is passed as a feature vector. """ stat_filters = stat_filters or DEFAULT_STAT_FILTERS model_filters = model_filters or DEFAULT_MODEL_FILTERS diff --git a/feature_selection/function.yaml b/feature_selection/function.yaml index f1bf53b8a..44cdd9894 100644 --- a/feature_selection/function.yaml +++ b/feature_selection/function.yaml @@ -73,7 +73,7 @@ spec: default: true - name: sample_ratio type: float - doc: percentage of the dataset the user whishes to compute the feature selection + doc: percentage of the dataset the user wishes to compute the feature selection process on. default: null - name: output_vector_name @@ -95,7 +95,7 @@ spec: command: '' build: origin_filename: '' - functionSourceCode:  + functionSourceCode:  code_origin: '' default_handler: feature_selection image: mlrun/mlrun diff --git a/feature_selection/requirements.txt b/feature_selection/requirements.txt index 70a079c7d..e4d79d180 100644 --- a/feature_selection/requirements.txt +++ b/feature_selection/requirements.txt @@ -1,5 +1,3 @@ scikit-learn -matplotlib -seaborn scikit-plot - +plotly~=5.4.0 diff --git a/feature_selection/test_feature_selection.py b/feature_selection/test_feature_selection.py index 9cb5ca621..6ae949aab 100644 --- a/feature_selection/test_feature_selection.py +++ b/feature_selection/test_feature_selection.py @@ -12,14 +12,31 @@ # See the License for the specific language governing permissions and # limitations under the License. # -from mlrun import code_to_function -from pathlib import Path +import os import shutil +from pathlib import Path + +import mlrun -METRICS_PATH = 'data/metrics.pq' -ARTIFACTS_PATH = 'artifacts' -RUNS_PATH = 'runs' -SCHEDULES_PATH = 'schedules' +METRICS_PATH = "data/metrics.pq" +ARTIFACTS_PATH = "artifacts" +RUNS_PATH = "runs" +SCHEDULES_PATH = "schedules" +PLOTS_PATH = os.path.abspath("./artifacts/feature-selection-feature-selection/0") + + +def _validate_paths(paths): + """ + Check if all the expected plot are saved + """ + base_folder = PLOTS_PATH + for path in paths: + full_path = os.path.join(base_folder, path) + if Path(full_path).is_file(): + print(f"{path} exist") + else: + raise FileNotFoundError(f"{path} not found!") + return True def _delete_outputs(paths): @@ -29,20 +46,24 @@ def _delete_outputs(paths): def test_run_local_feature_selection(): - fn = code_to_function(name='test_run_local_feature_selection', - filename="feature_selection.py", - handler="feature_selection", - kind="local", - ) - fn.spec.command = "feature_selection.py" + fn = mlrun.import_function("function.yaml") run = fn.run( params={ - 'k': 2, - 'min_votes': 0.3, - 'label_column': 'is_error', + "k": 2, + "min_votes": 0.3, + "label_column": "is_error", }, - inputs={'df_artifact': 'data/metrics.pq'}, - artifact_path='artifacts/', + inputs={"df_artifact": "data/metrics.pq"}, + artifact_path="artifacts/", + local=True, + ) + assert _validate_paths( + [ + "chi2.html", + "f_classif.html", + "f_regression.html", + "mutual_info_classif.html", + ] ) - assert run.outputs['feature_scores'] and run.outputs['selected_features'] _delete_outputs({ARTIFACTS_PATH, RUNS_PATH, SCHEDULES_PATH}) + assert run.outputs['feature_scores'] and run.outputs['selected_features'] diff --git a/hugging_face_serving/function.yaml b/hugging_face_serving/function.yaml index e1bb3b0ce..764fc1cfe 100644 --- a/hugging_face_serving/function.yaml +++ b/hugging_face_serving/function.yaml @@ -2,11 +2,13 @@ kind: serving metadata: name: hugging-face-serving tag: '' - hash: 39bfca7b639022fa03f5ca87f85f9e17fc837b70 + hash: 1a489a57da861f129eb26e933f34e58927e41195 project: '' labels: author: yonish categories: + - huggingface + - genai - model-serving - machine-learning spec: @@ -14,37 +16,28 @@ spec: args: [] image: mlrun/ml-models build: - commands: - - python -m pip install transformers==4.21.3 tensorflow==2.9.2 - code_origin: https://github.com/mlrun/functions.git#250244b2527c5ce8a82438b4340df34de6e19dc3:/Users/yonatanshelach/yoni/projects/functions/hugging_face_serving/hugging_face_serving.py - origin_filename: /Users/yonatanshelach/yoni/projects/functions/hugging_face_serving/hugging_face_serving.py + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKCmZyb20gYWJjIGltcG9ydCBBQkMKZnJvbSBpbXBvcnRsaWIgaW1wb3J0IGltcG9ydF9tb2R1bGUKZnJvbSB0eXBpbmcgaW1wb3J0IExpc3QKCmZyb20gdHJhbnNmb3JtZXJzIGltcG9ydCBwaXBlbGluZQoKaW1wb3J0IG1scnVuLnNlcnZpbmcKClBBQ0tBR0VfTU9EVUxFID0gInRyYW5zZm9ybWVycyIKU0VSSUFMSVpBQkxFX1RZUEVTID0gW2RpY3QsIGxpc3QsIHR1cGxlLCBzdHIsIGludCwgZmxvYXRdCgoKY2xhc3MgSHVnZ2luZ0ZhY2VNb2RlbFNlcnZlcihtbHJ1bi5zZXJ2aW5nLlYyTW9kZWxTZXJ2ZXIsIEFCQyk6CiAgICAiIiIKICAgIEh1Z2dpbmcgRmFjZSBNb2RlbCBzZXJ2aW5nIGNsYXNzLCBpbmhlcml0aW5nIHRoZSBWMk1vZGVsU2VydmVyIGNsYXNzIGZvciBiZWluZyBpbml0aWFsaXplZCBhdXRvbWF0aWNhbGx5IGJ5IHRoZQogICAgbW9kZWwgc2VydmVyIGFuZCBiZSBhYmxlIHRvIHJ1biBsb2NhbGx5IGFzIHBhcnQgb2YgYSBudWNsaW8gc2VydmVybGVzcyBmdW5jdGlvbiwgb3IgYXMgcGFydCBvZiBhIHJlYWwtdGltZSBwaXBlbGluZS4KICAgICIiIgoKICAgIGRlZiBfX2luaXRfXygKICAgICAgICBzZWxmLAogICAgICAgIGNvbnRleHQ6IG1scnVuLk1MQ2xpZW50Q3R4LAogICAgICAgIG5hbWU6IHN0ciwKICAgICAgICB0YXNrOiBzdHIsCiAgICAgICAgbW9kZWxfcGF0aDogc3RyID0gTm9uZSwKICAgICAgICBtb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG1vZGVsX2NsYXNzOiBzdHIgPSBOb25lLAogICAgICAgIHRva2VuaXplcl9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIHRva2VuaXplcl9jbGFzczogc3RyID0gTm9uZSwKICAgICAgICBmcmFtZXdvcms6IHN0ciA9IE5vbmUsCiAgICAgICAgKipjbGFzc19hcmdzLAogICAgKToKICAgICAgICAiIiIKICAgICAgICBJbml0aWFsaXplIGEgc2VydmluZyBjbGFzcyBmb3IgYSBIdWdnaW5nIGZhY2UgbW9kZWwuCgogICAgICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgIFRoZSBtbHJ1biBjb250ZXh0IHRvIHdvcmsgd2l0aAogICAgICAgIDpwYXJhbSBuYW1lOiAgICAgICAgICAgIFRoZSBuYW1lIG9mIHRoaXMgc2VydmVyIHRvIGJlIGluaXRpYWxpemVkCiAgICAgICAgOnBhcmFtIG1vZGVsX3BhdGg6ICAgICAgTm90IGluIHVzZS4gV2hlbiBhZGRpbmcgYSBtb2RlbCBwYXNzIGFueSBzdHJpbmcgdmFsdWUKICAgICAgICA6cGFyYW0gbW9kZWxfbmFtZTogICAgICBUaGUgbW9kZWwncyBuYW1lIGluIHRoZSBIdWdnaW5nIEZhY2UgaHViCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZS5nLiwgYG5scHRvd24vYmVydC1iYXNlLW11bHRpbGluZ3VhbC11bmNhc2VkLXNlbnRpbWVudGAKICAgICAgICA6cGFyYW0gbW9kZWxfY2xhc3M6ICAgICBUaGUgbW9kZWwncyBjbGFzcyB0eXBlIG9iamVjdCB3aGljaCBjYW4gYmUgcGFzc2VkIGFzIHRoZSBjbGFzcydzIG5hbWUgKHN0cmluZykuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTXVzdCBiZSBwcm92aWRlZCBhbmQgdG8gYmUgbWF0Y2hlZCB3aXRoIGBtb2RlbF9uYW1lYC4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlLmcuLCBgQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbmAKICAgICAgICA6cGFyYW0gdG9rZW5pemVyX25hbWU6ICBUaGUgdG9rZW5pemVyJ3MgbmFtZSBpbiB0aGUgSHVnZ2luZyBGYWNlIGh1YgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGUuZy4sIGBubHB0b3duL2JlcnQtYmFzZS1tdWx0aWxpbmd1YWwtdW5jYXNlZC1zZW50aW1lbnRgCiAgICAgICAgOnBhcmFtIHRva2VuaXplcl9jbGFzczogVGhlIG1vZGVsJ3MgY2xhc3MgdHlwZSBvYmplY3Qgd2hpY2ggY2FuIGJlIHBhc3NlZCBhcyB0aGUgY2xhc3MncyBuYW1lIChzdHJpbmcpLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIE11c3QgYmUgcHJvdmlkZWQgYW5kIHRvIGJlIG1hdGNoZWQgd2l0aCBgbW9kZWxfbmFtZWAuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZS5nLiwgYEF1dG9Ub2tlbml6ZXJgCiAgICAgICAgOnBhcmFtIGZyYW1ld29yazogICAgICAgVGhlIGZyYW1ld29yayB0byB1c2UsIGVpdGhlciBgInB0ImAgZm9yIFB5VG9yY2ggb3IgYCJ0ZiJgIGZvciBUZW5zb3JGbG93LiBUaGUgc3BlY2lmaWVkCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZnJhbWV3b3JrIG11c3QgYmUgaW5zdGFsbGVkLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIElmIG5vIGZyYW1ld29yayBpcyBzcGVjaWZpZWQsIHdpbGwgZGVmYXVsdCB0byB0aGUgb25lIGN1cnJlbnRseSBpbnN0YWxsZWQuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgbm8gZnJhbWV3b3JrIGlzIHNwZWNpZmllZCBhbmQgYm90aCBmcmFtZXdvcmtzIGFyZSBpbnN0YWxsZWQsIHdpbGwgZGVmYXVsdCB0byB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmcmFtZXdvcmsgb2YgdGhlIGBtb2RlbGAsIG9yIHRvIFB5VG9yY2ggaWYgbm8gbW9kZWwgaXMgcHJvdmlkZWQuCiAgICAgICAgOnBhcmFtIGNsYXNzX2FyZ3M6ICAgICAgLQogICAgICAgICIiIgogICAgICAgIHN1cGVyKEh1Z2dpbmdGYWNlTW9kZWxTZXJ2ZXIsIHNlbGYpLl9faW5pdF9fKAogICAgICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgICAgIG5hbWU9bmFtZSwKICAgICAgICAgICAgbW9kZWxfcGF0aD1tb2RlbF9wYXRoLAogICAgICAgICAgICAqKmNsYXNzX2FyZ3MsCiAgICAgICAgKQogICAgICAgIHNlbGYudGFzayA9IHRhc2sKICAgICAgICBzZWxmLm1vZGVsID0gTm9uZQogICAgICAgIHNlbGYudG9rZW5pemVyID0gTm9uZQogICAgICAgIHNlbGYubW9kZWxfbmFtZSA9IG1vZGVsX25hbWUKICAgICAgICBzZWxmLnRva2VuaXplcl9uYW1lID0gdG9rZW5pemVyX25hbWUKICAgICAgICBzZWxmLm1vZGVsX2NsYXNzID0gbW9kZWxfY2xhc3MKICAgICAgICBzZWxmLnRva2VuaXplcl9jbGFzcyA9IHRva2VuaXplcl9jbGFzcwogICAgICAgIHNlbGYuZnJhbWV3b3JrID0gZnJhbWV3b3JrCiAgICAgICAgc2VsZi5waXBlID0gTm9uZQoKICAgIGRlZiBsb2FkKHNlbGYpOgogICAgICAgICIiImxvYWQgYW5kIGluaXRpYWxpemUgdGhlIG1vZGVsIGFuZC9vciBvdGhlciBlbGVtZW50cyIiIgogICAgICAgIGlmIHNlbGYubW9kZWxfY2xhc3M6CiAgICAgICAgICAgIG1vZGVsX29iamVjdCA9IGdldGF0dHIoaW1wb3J0X21vZHVsZShQQUNLQUdFX01PRFVMRSksIHNlbGYubW9kZWxfY2xhc3MpCiAgICAgICAgICAgIHNlbGYubW9kZWwgPSBtb2RlbF9vYmplY3QuZnJvbV9wcmV0cmFpbmVkKHNlbGYubW9kZWxfbmFtZSkKICAgICAgICBpZiBzZWxmLnRva2VuaXplcl9jbGFzczoKICAgICAgICAgICAgdG9rZW5pemVyX29iamVjdCA9IGdldGF0dHIoCiAgICAgICAgICAgICAgICBpbXBvcnRfbW9kdWxlKFBBQ0tBR0VfTU9EVUxFKSwgc2VsZi50b2tlbml6ZXJfY2xhc3MKICAgICAgICAgICAgKQogICAgICAgICAgICBzZWxmLnRva2VuaXplciA9IHRva2VuaXplcl9vYmplY3QuZnJvbV9wcmV0cmFpbmVkKHNlbGYudG9rZW5pemVyX25hbWUpCiAgICAgICAgc2VsZi5waXBlID0gcGlwZWxpbmUoCiAgICAgICAgICAgIHRhc2s9c2VsZi50YXNrLAogICAgICAgICAgICBtb2RlbD1zZWxmLm1vZGVsIG9yIHNlbGYubW9kZWxfbmFtZSwKICAgICAgICAgICAgdG9rZW5pemVyPXNlbGYudG9rZW5pemVyLAogICAgICAgICAgICBmcmFtZXdvcms9c2VsZi5mcmFtZXdvcmssCiAgICAgICAgKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIGJvZHk6IGRpY3QpIC0+IExpc3Q6CiAgICAgICAgIiIiR2VuZXJhdGUgbW9kZWwgcHJlZGljdGlvbnMgZnJvbSBzYW1wbGUuIiIiCiAgICAgICAgaWYgc2VsZi5waXBlIGlzIE5vbmU6CiAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoIlBsZWFzZSB1c2UgYC5sb2FkKClgIikKICAgICAgICB0cnk6CiAgICAgICAgICAgIGlmIGlzaW5zdGFuY2UoYm9keVsiaW5wdXRzIl1bMF0sIGRpY3QpOgogICAgICAgICAgICAgICAgcmVzdWx0ID0gW3NlbGYucGlwZSgqKl9pbnB1dCkgZm9yIF9pbnB1dCBpbiBib2R5WyJpbnB1dHMiXV0KICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIHJlc3VsdCA9IHNlbGYucGlwZShib2R5WyJpbnB1dHMiXSkKICAgICAgICAgICAgIyByZXBsYWNlIGxpc3Qgb2YgbGlzdHMgb2YgZGljdHMgaW50byBhIGxpc3Qgb2YgZGljdHM6CiAgICAgICAgICAgIGlmIGFsbChpc2luc3RhbmNlKHJlcywgbGlzdCkgZm9yIHJlcyBpbiByZXN1bHQpOgogICAgICAgICAgICAgICAgbmV3X3Jlc3VsdCA9IFtyZXNbMF0gZm9yIHJlcyBpbiByZXN1bHRdCiAgICAgICAgICAgICAgICByZXN1bHQgPSBuZXdfcmVzdWx0CgogICAgICAgICAgICBub25fc2VyaWFsaXphYmxlX3R5cGVzID0gW10KICAgICAgICAgICAgZm9yIHJlcyBpbiByZXN1bHQ6CiAgICAgICAgICAgICAgICBmb3Iga2V5LCB2YWwgaW4gcmVzLml0ZW1zKCk6CiAgICAgICAgICAgICAgICAgICAgaWYgdHlwZSh2YWwpIG5vdCBpbiBTRVJJQUxJWkFCTEVfVFlQRVM6CiAgICAgICAgICAgICAgICAgICAgICAgIG5vbl9zZXJpYWxpemFibGVfdHlwZXMuYXBwZW5kKHN0cih0eXBlKHZhbCkpKQogICAgICAgICAgICAgICAgICAgICAgICByZXNba2V5XSA9IHN0cih2YWwpCiAgICAgICAgICAgIGlmIG5vbl9zZXJpYWxpemFibGVfdHlwZXM6CiAgICAgICAgICAgICAgICBzZWxmLmNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAgICAgZiJOb24tc2VyaWFsaXphYmxlIHR5cGVzOiB7bm9uX3NlcmlhbGl6YWJsZV90eXBlc30gd2VyZSBjYXN0ZWQgdG8gc3RyaW5ncyIKICAgICAgICAgICAgICAgICkKICAgICAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgICAgIHJhaXNlIEV4Y2VwdGlvbigiRmFpbGVkIHRvIHByZWRpY3QgJXMiICUgZSkKICAgICAgICByZXR1cm4gcmVzdWx0Cgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK + commands: [] + code_origin: '' + origin_filename: '' + requirements: + - transformers==4.21.3 + - tensorflow==2.9.2 description: Generic Hugging Face model server. - default_handler: handler + default_handler: '' disable_auto_mount: false - env: [] + clone_target_dir: '' + env: + - name: MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK + value: enabled priority_class_name: '' preemption_mode: prevent min_replicas: 1 max_replicas: 4 - base_spec: - apiVersion: nuclio.io/v1 - kind: Function - metadata: - name: hugging-face-serving - labels: {} - annotations: - nuclio.io/generated_by: function generated from /Users/yonatanshelach/yoni/projects/functions/hugging_face_serving/hugging_face_serving.py - spec: - runtime: python - handler: hugging_face_serving:handler - env: [] - volumes: [] - build: - commands: [] - noBaseImagesPull: true - functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKCmZyb20gYWJjIGltcG9ydCBBQkMKZnJvbSBpbXBvcnRsaWIgaW1wb3J0IGltcG9ydF9tb2R1bGUKZnJvbSB0eXBpbmcgaW1wb3J0IExpc3QKCmZyb20gdHJhbnNmb3JtZXJzIGltcG9ydCBwaXBlbGluZQoKaW1wb3J0IG1scnVuLnNlcnZpbmcKClBBQ0tBR0VfTU9EVUxFID0gInRyYW5zZm9ybWVycyIKU0VSSUFMSVpBQkxFX1RZUEVTID0gW2RpY3QsIGxpc3QsIHR1cGxlLCBzdHIsIGludCwgZmxvYXRdCgoKY2xhc3MgSHVnZ2luZ0ZhY2VNb2RlbFNlcnZlcihtbHJ1bi5zZXJ2aW5nLlYyTW9kZWxTZXJ2ZXIsIEFCQyk6CiAgICAiIiIKICAgIEh1Z2dpbmcgRmFjZSBNb2RlbCBzZXJ2aW5nIGNsYXNzLCBpbmhlcml0aW5nIHRoZSBWMk1vZGVsU2VydmVyIGNsYXNzIGZvciBiZWluZyBpbml0aWFsaXplZCBhdXRvbWF0aWNhbGx5IGJ5IHRoZQogICAgbW9kZWwgc2VydmVyIGFuZCBiZSBhYmxlIHRvIHJ1biBsb2NhbGx5IGFzIHBhcnQgb2YgYSBudWNsaW8gc2VydmVybGVzcyBmdW5jdGlvbiwgb3IgYXMgcGFydCBvZiBhIHJlYWwtdGltZSBwaXBlbGluZS4KICAgICIiIgoKICAgIGRlZiBfX2luaXRfXygKICAgICAgICBzZWxmLAogICAgICAgIGNvbnRleHQ6IG1scnVuLk1MQ2xpZW50Q3R4LAogICAgICAgIG5hbWU6IHN0ciwKICAgICAgICB0YXNrOiBzdHIsCiAgICAgICAgbW9kZWxfcGF0aDogc3RyID0gTm9uZSwKICAgICAgICBtb2RlbF9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIG1vZGVsX2NsYXNzOiBzdHIgPSBOb25lLAogICAgICAgIHRva2VuaXplcl9uYW1lOiBzdHIgPSBOb25lLAogICAgICAgIHRva2VuaXplcl9jbGFzczogc3RyID0gTm9uZSwKICAgICAgICBmcmFtZXdvcms6IHN0ciA9IE5vbmUsCiAgICAgICAgKipjbGFzc19hcmdzLAogICAgKToKICAgICAgICAiIiIKICAgICAgICBJbml0aWFsaXplIGEgc2VydmluZyBjbGFzcyBmb3IgYSBIdWdnaW5nIGZhY2UgbW9kZWwuCgogICAgICAgIDpwYXJhbSBjb250ZXh0OiAgICAgICAgIFRoZSBtbHJ1biBjb250ZXh0IHRvIHdvcmsgd2l0aAogICAgICAgIDpwYXJhbSBuYW1lOiAgICAgICAgICAgIFRoZSBuYW1lIG9mIHRoaXMgc2VydmVyIHRvIGJlIGluaXRpYWxpemVkCiAgICAgICAgOnBhcmFtIG1vZGVsX3BhdGg6ICAgICAgTm90IGluIHVzZS4gV2hlbiBhZGRpbmcgYSBtb2RlbCBwYXNzIGFueSBzdHJpbmcgdmFsdWUKICAgICAgICA6cGFyYW0gbW9kZWxfbmFtZTogICAgICBUaGUgbW9kZWwncyBuYW1lIGluIHRoZSBIdWdnaW5nIEZhY2UgaHViCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZS5nLiwgYG5scHRvd24vYmVydC1iYXNlLW11bHRpbGluZ3VhbC11bmNhc2VkLXNlbnRpbWVudGAKICAgICAgICA6cGFyYW0gbW9kZWxfY2xhc3M6ICAgICBUaGUgbW9kZWwncyBjbGFzcyB0eXBlIG9iamVjdCB3aGljaCBjYW4gYmUgcGFzc2VkIGFzIHRoZSBjbGFzcydzIG5hbWUgKHN0cmluZykuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgTXVzdCBiZSBwcm92aWRlZCBhbmQgdG8gYmUgbWF0Y2hlZCB3aXRoIGBtb2RlbF9uYW1lYC4KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBlLmcuLCBgQXV0b01vZGVsRm9yU2VxdWVuY2VDbGFzc2lmaWNhdGlvbmAKICAgICAgICA6cGFyYW0gdG9rZW5pemVyX25hbWU6ICBUaGUgdG9rZW5pemVyJ3MgbmFtZSBpbiB0aGUgSHVnZ2luZyBGYWNlIGh1YgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIGUuZy4sIGBubHB0b3duL2JlcnQtYmFzZS1tdWx0aWxpbmd1YWwtdW5jYXNlZC1zZW50aW1lbnRgCiAgICAgICAgOnBhcmFtIHRva2VuaXplcl9jbGFzczogVGhlIG1vZGVsJ3MgY2xhc3MgdHlwZSBvYmplY3Qgd2hpY2ggY2FuIGJlIHBhc3NlZCBhcyB0aGUgY2xhc3MncyBuYW1lIChzdHJpbmcpLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIE11c3QgYmUgcHJvdmlkZWQgYW5kIHRvIGJlIG1hdGNoZWQgd2l0aCBgbW9kZWxfbmFtZWAuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZS5nLiwgYEF1dG9Ub2tlbml6ZXJgCiAgICAgICAgOnBhcmFtIGZyYW1ld29yazogICAgICAgVGhlIGZyYW1ld29yayB0byB1c2UsIGVpdGhlciBgInB0ImAgZm9yIFB5VG9yY2ggb3IgYCJ0ZiJgIGZvciBUZW5zb3JGbG93LiBUaGUgc3BlY2lmaWVkCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgZnJhbWV3b3JrIG11c3QgYmUgaW5zdGFsbGVkLgogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIElmIG5vIGZyYW1ld29yayBpcyBzcGVjaWZpZWQsIHdpbGwgZGVmYXVsdCB0byB0aGUgb25lIGN1cnJlbnRseSBpbnN0YWxsZWQuCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgSWYgbm8gZnJhbWV3b3JrIGlzIHNwZWNpZmllZCBhbmQgYm90aCBmcmFtZXdvcmtzIGFyZSBpbnN0YWxsZWQsIHdpbGwgZGVmYXVsdCB0byB0aGUKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBmcmFtZXdvcmsgb2YgdGhlIGBtb2RlbGAsIG9yIHRvIFB5VG9yY2ggaWYgbm8gbW9kZWwgaXMgcHJvdmlkZWQuCiAgICAgICAgOnBhcmFtIGNsYXNzX2FyZ3M6ICAgICAgLQogICAgICAgICIiIgogICAgICAgIHN1cGVyKEh1Z2dpbmdGYWNlTW9kZWxTZXJ2ZXIsIHNlbGYpLl9faW5pdF9fKAogICAgICAgICAgICBjb250ZXh0PWNvbnRleHQsCiAgICAgICAgICAgIG5hbWU9bmFtZSwKICAgICAgICAgICAgbW9kZWxfcGF0aD1tb2RlbF9wYXRoLAogICAgICAgICAgICAqKmNsYXNzX2FyZ3MsCiAgICAgICAgKQogICAgICAgIHNlbGYudGFzayA9IHRhc2sKICAgICAgICBzZWxmLm1vZGVsID0gTm9uZQogICAgICAgIHNlbGYudG9rZW5pemVyID0gTm9uZQogICAgICAgIHNlbGYubW9kZWxfbmFtZSA9IG1vZGVsX25hbWUKICAgICAgICBzZWxmLnRva2VuaXplcl9uYW1lID0gdG9rZW5pemVyX25hbWUKICAgICAgICBzZWxmLm1vZGVsX2NsYXNzID0gbW9kZWxfY2xhc3MKICAgICAgICBzZWxmLnRva2VuaXplcl9jbGFzcyA9IHRva2VuaXplcl9jbGFzcwogICAgICAgIHNlbGYuZnJhbWV3b3JrID0gZnJhbWV3b3JrCiAgICAgICAgc2VsZi5waXBlID0gTm9uZQoKICAgIGRlZiBsb2FkKHNlbGYpOgogICAgICAgICIiImxvYWQgYW5kIGluaXRpYWxpemUgdGhlIG1vZGVsIGFuZC9vciBvdGhlciBlbGVtZW50cyIiIgogICAgICAgIGlmIHNlbGYubW9kZWxfY2xhc3M6CiAgICAgICAgICAgIG1vZGVsX29iamVjdCA9IGdldGF0dHIoaW1wb3J0X21vZHVsZShQQUNLQUdFX01PRFVMRSksIHNlbGYubW9kZWxfY2xhc3MpCiAgICAgICAgICAgIHNlbGYubW9kZWwgPSBtb2RlbF9vYmplY3QuZnJvbV9wcmV0cmFpbmVkKHNlbGYubW9kZWxfbmFtZSkKICAgICAgICBpZiBzZWxmLnRva2VuaXplcl9jbGFzczoKICAgICAgICAgICAgdG9rZW5pemVyX29iamVjdCA9IGdldGF0dHIoCiAgICAgICAgICAgICAgICBpbXBvcnRfbW9kdWxlKFBBQ0tBR0VfTU9EVUxFKSwgc2VsZi50b2tlbml6ZXJfY2xhc3MKICAgICAgICAgICAgKQogICAgICAgICAgICBzZWxmLnRva2VuaXplciA9IHRva2VuaXplcl9vYmplY3QuZnJvbV9wcmV0cmFpbmVkKHNlbGYudG9rZW5pemVyX25hbWUpCiAgICAgICAgc2VsZi5waXBlID0gcGlwZWxpbmUoCiAgICAgICAgICAgIHRhc2s9c2VsZi50YXNrLAogICAgICAgICAgICBtb2RlbD1zZWxmLm1vZGVsIG9yIHNlbGYubW9kZWxfbmFtZSwKICAgICAgICAgICAgdG9rZW5pemVyPXNlbGYudG9rZW5pemVyLAogICAgICAgICAgICBmcmFtZXdvcms9c2VsZi5mcmFtZXdvcmssCiAgICAgICAgKQoKICAgIGRlZiBwcmVkaWN0KHNlbGYsIGJvZHk6IGRpY3QpIC0+IExpc3Q6CiAgICAgICAgIiIiR2VuZXJhdGUgbW9kZWwgcHJlZGljdGlvbnMgZnJvbSBzYW1wbGUuIiIiCiAgICAgICAgaWYgc2VsZi5waXBlIGlzIE5vbmU6CiAgICAgICAgICAgIHJhaXNlIFZhbHVlRXJyb3IoIlBsZWFzZSB1c2UgYC5sb2FkKClgIikKICAgICAgICB0cnk6CiAgICAgICAgICAgIGlmIGlzaW5zdGFuY2UoYm9keVsiaW5wdXRzIl1bMF0sIGRpY3QpOgogICAgICAgICAgICAgICAgcmVzdWx0ID0gW3NlbGYucGlwZSgqKl9pbnB1dCkgZm9yIF9pbnB1dCBpbiBib2R5WyJpbnB1dHMiXV0KICAgICAgICAgICAgZWxzZToKICAgICAgICAgICAgICAgIHJlc3VsdCA9IHNlbGYucGlwZShib2R5WyJpbnB1dHMiXSkKICAgICAgICAgICAgIyByZXBsYWNlIGxpc3Qgb2YgbGlzdHMgb2YgZGljdHMgaW50byBhIGxpc3Qgb2YgZGljdHM6CiAgICAgICAgICAgIGlmIGFsbChpc2luc3RhbmNlKHJlcywgbGlzdCkgZm9yIHJlcyBpbiByZXN1bHQpOgogICAgICAgICAgICAgICAgbmV3X3Jlc3VsdCA9IFtyZXNbMF0gZm9yIHJlcyBpbiByZXN1bHRdCiAgICAgICAgICAgICAgICByZXN1bHQgPSBuZXdfcmVzdWx0CgogICAgICAgICAgICBub25fc2VyaWFsaXphYmxlX3R5cGVzID0gW10KICAgICAgICAgICAgZm9yIHJlcyBpbiByZXN1bHQ6CiAgICAgICAgICAgICAgICBmb3Iga2V5LCB2YWwgaW4gcmVzLml0ZW1zKCk6CiAgICAgICAgICAgICAgICAgICAgaWYgdHlwZSh2YWwpIG5vdCBpbiBTRVJJQUxJWkFCTEVfVFlQRVM6CiAgICAgICAgICAgICAgICAgICAgICAgIG5vbl9zZXJpYWxpemFibGVfdHlwZXMuYXBwZW5kKHN0cih0eXBlKHZhbCkpKQogICAgICAgICAgICAgICAgICAgICAgICByZXNba2V5XSA9IHN0cih2YWwpCiAgICAgICAgICAgIGlmIG5vbl9zZXJpYWxpemFibGVfdHlwZXM6CiAgICAgICAgICAgICAgICBzZWxmLmNvbnRleHQubG9nZ2VyLmluZm8oCiAgICAgICAgICAgICAgICAgICAgZiJOb24tc2VyaWFsaXphYmxlIHR5cGVzOiB7bm9uX3NlcmlhbGl6YWJsZV90eXBlc30gd2VyZSBjYXN0ZWQgdG8gc3RyaW5ncyIKICAgICAgICAgICAgICAgICkKICAgICAgICBleGNlcHQgRXhjZXB0aW9uIGFzIGU6CiAgICAgICAgICAgIHJhaXNlIEV4Y2VwdGlvbigiRmFpbGVkIHRvIHByZWRpY3QgJXMiICUgZSkKICAgICAgICByZXR1cm4gcmVzdWx0Cgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK source: '' function_kind: serving_v2 + function_handler: hugging_face_serving:handler + base_image_pull: false default_class: HuggingFaceModelServer secret_sources: [] affinity: null diff --git a/hugging_face_serving/item.yaml b/hugging_face_serving/item.yaml index f7fa92637..d1f78769d 100644 --- a/hugging_face_serving/item.yaml +++ b/hugging_face_serving/item.yaml @@ -1,5 +1,7 @@ apiVersion: v1 categories: +- huggingface +- genai - model-serving - machine-learning description: Generic Hugging Face model server. @@ -26,4 +28,5 @@ spec: - transformers==4.21.3 - tensorflow==2.9.2 url: '' -version: 1.0.0 +version: 1.1.0 +test_valid: false \ No newline at end of file diff --git a/mlflow_utils/function.yaml b/mlflow_utils/function.yaml new file mode 100644 index 000000000..d2e2bffec --- /dev/null +++ b/mlflow_utils/function.yaml @@ -0,0 +1,31 @@ +metadata: + name: mlflow-utils + categories: + - genai + - model-serving + - machine-learning + tag: '' +spec: + default_handler: '' + image: mlrun/mlrun + command: '' + base_image_pull: false + default_class: MLFlowModelServer + function_handler: mlflow-utils:handler + disable_auto_mount: false + build: + origin_filename: '' + code_origin: '' + requirements: + - mlflow==2.12.2 + functionSourceCode: aW1wb3J0IHppcGZpbGUKZnJvbSB0eXBpbmcgaW1wb3J0IEFueSwgRGljdAppbXBvcnQgbWxmbG93CmZyb20gbWxydW4uc2VydmluZy52Ml9zZXJ2aW5nIGltcG9ydCBWMk1vZGVsU2VydmVyCmltcG9ydCBwYW5kYXMgYXMgcGQKCgpjbGFzcyBNTEZsb3dNb2RlbFNlcnZlcihWMk1vZGVsU2VydmVyKToKICAgICIiIgogICAgTUxGbG93IHRyYWNrZXIgTW9kZWwgc2VydmluZyBjbGFzcywgaW5oZXJpdGluZyB0aGUgVjJNb2RlbFNlcnZlciBjbGFzcyBmb3IgYmVpbmcgaW5pdGlhbGl6ZWQgYXV0b21hdGljYWxseSBieSB0aGUgbW9kZWwKICAgIHNlcnZlciBhbmQgYmUgYWJsZSB0byBydW4gbG9jYWxseSBhcyBwYXJ0IG9mIGEgbnVjbGlvIHNlcnZlcmxlc3MgZnVuY3Rpb24sIG9yIGFzIHBhcnQgb2YgYSByZWFsLXRpbWUgcGlwZWxpbmUuCiAgICAiIiIKCiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkcyBhbiBtb2RlbCB0aGF0IHdhcyBsb2dnZWQgYnkgdGhlIE1MRmxvdyB0cmFja2VyIG1vZGVsCiAgICAgICAgIiIiCiAgICAgICAgIyBVbnppcCB0aGUgbW9kZWwgZGlyIGFuZCB0aGVuIHVzZSBtbGZsb3cncyBsb2FkIGZ1bmN0aW9uCiAgICAgICAgbW9kZWxfZmlsZSwgXyA9IHNlbGYuZ2V0X21vZGVsKCIuemlwIikKICAgICAgICBtb2RlbF9wYXRoX3VuemlwID0gbW9kZWxfZmlsZS5yZXBsYWNlKCIuemlwIiwgIiIpCgogICAgICAgIHdpdGggemlwZmlsZS5aaXBGaWxlKG1vZGVsX2ZpbGUsICJyIikgYXMgemlwX3JlZjoKICAgICAgICAgICAgemlwX3JlZi5leHRyYWN0YWxsKG1vZGVsX3BhdGhfdW56aXApCgogICAgICAgIHNlbGYubW9kZWwgPSBtbGZsb3cucHlmdW5jLmxvYWRfbW9kZWwobW9kZWxfcGF0aF91bnppcCkKCiAgICBkZWYgcHJlZGljdChzZWxmLCByZXF1ZXN0OiBEaWN0W3N0ciwgQW55XSkgLT4gbGlzdDoKICAgICAgICAiIiIKICAgICAgICBJbmZlciB0aGUgaW5wdXRzIHRocm91Z2ggdGhlIG1vZGVsLiBUaGUgaW5mZXJyZWQgZGF0YSB3aWxsCiAgICAgICAgYmUgcmVhZCBmcm9tIHRoZSAiaW5wdXRzIiBrZXkgb2YgdGhlIHJlcXVlc3QuCgogICAgICAgIDpwYXJhbSByZXF1ZXN0OiBUaGUgcmVxdWVzdCB0byB0aGUgbW9kZWwgdXNpbmcgeGdib29zdCdzIHByZWRpY3QuCiAgICAgICAgICAgICAgICBUaGUgaW5wdXQgdG8gdGhlIG1vZGVsIHdpbGwgYmUgcmVhZCBmcm9tIHRoZSAiaW5wdXRzIiBrZXkuCgogICAgICAgIDpyZXR1cm46IFRoZSBtb2RlbCdzIHByZWRpY3Rpb24gb24gdGhlIGdpdmVuIGlucHV0LgogICAgICAgICIiIgoKICAgICAgICAjIEdldCB0aGUgaW5wdXRzIGFuZCBzZXQgdG8gYWNjZXB0ZWQgdHlwZToKICAgICAgICBpbnB1dHMgPSBwZC5EYXRhRnJhbWUocmVxdWVzdFsiaW5wdXRzIl0pCgogICAgICAgICMgUHJlZGljdCB1c2luZyB0aGUgbW9kZWwncyBwcmVkaWN0IGZ1bmN0aW9uOgogICAgICAgIHByZWRpY3Rpb25zID0gc2VsZi5tb2RlbC5wcmVkaWN0KGlucHV0cykKCiAgICAgICAgIyBSZXR1cm4gYXMgbGlzdDoKICAgICAgICByZXR1cm4gcHJlZGljdGlvbnMudG9saXN0KCkKCmZyb20gbWxydW4ucnVudGltZXMgaW1wb3J0IG51Y2xpb19pbml0X2hvb2sKZGVmIGluaXRfY29udGV4dChjb250ZXh0KToKICAgIG51Y2xpb19pbml0X2hvb2soY29udGV4dCwgZ2xvYmFscygpLCAnc2VydmluZ192MicpCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICByZXR1cm4gY29udGV4dC5tbHJ1bl9oYW5kbGVyKGNvbnRleHQsIGV2ZW50KQo= + min_replicas: 1 + description: Mlflow model server, and additional utils. + max_replicas: 4 + source: '' + function_kind: serving_v2 + env: + - name: MLRUN_HTTPDB__NUCLIO__EXPLICIT_ACK + value: enabled +verbose: false +kind: serving diff --git a/mlflow_utils/item.yaml b/mlflow_utils/item.yaml new file mode 100644 index 000000000..bda09c5bb --- /dev/null +++ b/mlflow_utils/item.yaml @@ -0,0 +1,31 @@ +apiVersion: v1 +categories: +- genai +- model-serving +- machine-learning +description: Mlflow model server, and additional utils. +doc: '' +example: mlflow_utils.ipynb +generationDate: 2024-05-23:12-00 +hidden: false +icon: '' +labels: + author: zeevr +maintainers: [] +marketplaceType: '' +mlrunVersion: 1.7.0-rc17 +name: mlflow_utils +platformVersion: '' +spec: + customFields: + default_class: MLFlowModelServer + filename: mlflow_utils.py + handler: handler + image: mlrun/mlrun + kind: serving + requirements: + - mlflow==2.12.2 + - lightgbm + - xgboost +url: '' +version: 1.0.0 diff --git a/mlflow_utils/mlflow_utils.ipynb b/mlflow_utils/mlflow_utils.ipynb new file mode 100644 index 000000000..165dafc6f --- /dev/null +++ b/mlflow_utils/mlflow_utils.ipynb @@ -0,0 +1,1353 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c478ebb2", + "metadata": {}, + "source": [ + "# MLflow tracker demo\n", + "\n", + "This demo demonstrates how to seamlessly integrate and transfer logs from MLflow to MLRun,
\n", + "creating a unified and powerful platform for your machine learning experiments.\n", + "\n", + "You can combine MLflow and MLRun for a comprehensive solution for managing, tracking, and deploying machine learning models. \n", + "\n", + "This notebook guides you through the process of:\n", + "\n", + "1. Setting up the integration between MLflow and MLRun.\n", + "2. Extracting data, metrics, and artifacts from MLflow experiments.\n", + "3. Creating MLRun artifacts and projects to organize and manage the transferred data.\n", + "4. Leveraging MLRun's capabilities for model deployment and data processing.\n", + "\n", + "By the end of this demo, you will have a understanding of how to establish a smooth flow of data between MLflow and MLRun.\n", + "\n", + "## MLRun installation and configuration\n", + "Before running this notebook make sure the mlrun package is installed (pip install mlrun) and that you have configured the access to MLRun service." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "ab49e1f1", + "metadata": {}, + "outputs": [], + "source": [ + "# Install MLRun and scikit-learn if not already installed. Run this only once. Restart the notebook after the install!\n", + "# %pip install mlrun scikit-learn~=1.3.0" + ] + }, + { + "cell_type": "markdown", + "id": "1770566a", + "metadata": {}, + "source": [ + "Then you can import the necessary packages." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0d2dfd8b-65c4-417b-b66e-99f44b015ee7", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import os\n", + "import mlrun\n", + "from mlrun.datastore.targets import ParquetTarget\n", + "import mlrun.feature_store as fstore" + ] + }, + { + "cell_type": "markdown", + "id": "7c4513d4", + "metadata": {}, + "source": [ + "Create a project for this demo:" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "43ea863f-02d5-45f2-8143-306ce3bb6c58", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:34:40,940 [info] Project loaded successfully: {'project_name': 'mlflow-tracking-example-guy'}\n" + ] + } + ], + "source": [ + "# Create a project for this demo:\n", + "project = mlrun.get_or_create_project(name=\"mlflow-tracking-example\", context=\"./\")" + ] + }, + { + "cell_type": "markdown", + "id": "94413ee8", + "metadata": {}, + "source": [ + "Set all the necessary environment variables for the Databricks cluster:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "22f94f89-acce-442d-93ff-b2d08d3a35a4", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "DATABRICKS_HOST=\"add your host\"\n", + "DATABRICKS_TOKEN=\"add your token\"\n", + "DATABRICKS_CLUSTER_ID=\"add your cluster id\"" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "7af310da-fd02-444e-8619-43ba6dcdb0a4", + "metadata": {}, + "outputs": [], + "source": [ + "os.environ[\"DATABRICKS_HOST\"] = DATABRICKS_HOST\n", + "os.environ[\"DATABRICKS_TOKEN\"] = DATABRICKS_TOKEN\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "d98e823c-3a27-4532-9a2d-6398ea4e1778", + "metadata": {}, + "outputs": [], + "source": [ + "# Set the Databricks environment variables\n", + "job_env = {\n", + " \"DATABRICKS_HOST\": DATABRICKS_HOST,\n", + " \"DATABRICKS_CLUSTER_ID\": DATABRICKS_CLUSTER_ID\n", + "}\n", + "secrets = {\"DATABRICKS_TOKEN\": DATABRICKS_TOKEN}\n", + "\n", + "# Set the secrets in the project\n", + "project.set_secrets(secrets)" + ] + }, + { + "cell_type": "markdown", + "id": "37d75366", + "metadata": {}, + "source": [ + "## Create a feature set and ingest data\n", + "\n", + "This is a short example of how to create a feature set about music preferences." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5701c04a-8442-4958-8f4c-265bf4c9b06a", + "metadata": {}, + "outputs": [], + "source": [ + "# create df\n", + "columns = [\"id\", \"name\", \"age\", \"gender\", \"favorite_music_type\"]\n", + "data = [\n", + " (1, \"Alice\", 20, \"f\", \"Pop\"),\n", + " (2, \"Bob\", 30, \"m\", \"Rock\"),\n", + " (3, \"Charlie\", 25, \"m\", \"Pop\"),\n", + " (4, \"David\", 40, \"m\", \"Classical\"),\n", + " (5, \"Eva\", 18, \"f\", \"Pop\"),\n", + " (6, \"Frank\", 32, \"m\", \"Rock\"),\n", + " (7, \"Grace\", 28, \"f\", \"Pop\"),\n", + " (8, \"Henry\", 45, \"m\", \"Classical\"),\n", + " (9, \"Ivy\", 22, \"f\", \"Pop\"),\n", + " (10, \"Jack\", 38, \"m\", \"Classical\"),\n", + " (11, \"Karen\", 27, \"f\", \"Pop\"),\n", + " (12, \"Liam\", 19, \"m\", \"Pop\"),\n", + " (13, \"Mia\", 27, \"f\", \"Rock\"),\n", + " (14, \"Nora\", 31, \"f\", \"Rock\"),\n", + " (15, \"Oliver\", 29, \"m\", \"Pop\"),\n", + " (16, \"Ben\", 38, \"m\", \"Pop\"),\n", + " (17, \"Alicia\", 20, \"f\", \"Pop\"),\n", + " (18, \"Bobby\", 30, \"m\", \"Rock\"),\n", + " (19, \"Charlien\", 22, \"f\", \"Pop\"),\n", + " (20, \"Davide\", 40, \"m\", \"Classical\"),\n", + " (21, \"Evans\", 19, \"m\", \"Pop\"),\n", + " (22, \"Franklin\", 34, \"m\", \"Rock\"),\n", + " (23, \"Grace\", 22, \"f\", \"Pop\"),\n", + " (24, \"Henrik\", 48, \"m\", \"Classical\"),\n", + " (25, \"eevee\", 29, \"f\", \"Pop\"),\n", + " (26, \"Jack\", 75, \"m\", \"Classical\"),\n", + " (27, \"Karen\", 26, \"f\", \"Pop\"),\n", + " (28, \"Lian\", 21, \"f\", \"Pop\"),\n", + " (29, \"kia\", 27, \"f\", \"Rock\"),\n", + " (30, \"Novak\", 30, \"m\", \"Rock\"),\n", + " (31, \"Olivia\", 29, \"f\", \"Pop\"),\n", + " (32, \"Benjamin\", 18, \"m\", \"Pop\")\n", + "]\n", + "df = pd.DataFrame(data, columns=columns)" + ] + }, + { + "cell_type": "markdown", + "id": "4b91576b", + "metadata": {}, + "source": [ + "Transfer the data to DataBricks." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8679b0bb-0da6-4c35-9345-6cf0e83e19b2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'dbfs:///demos/mlrun_databricks_demo/1711553684480_33/music.parquet'" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Where to save the data in DataBricks\n", + "target_path = f\"dbfs:///demos/mlrun_databricks_demo/music.parquet\"\n", + "output_path = f\"dbfs:///demos/mlrun_databricks_demo/music_output_new.parquet\"\n", + "\n", + "targets = [ParquetTarget(path=target_path)]\n", + "\n", + "# Create a feature set and ingest the data\n", + "fset = fstore.FeatureSet(name=\"music_fset\", entities=[fstore.Entity(\"name\")])\n", + "fstore.ingest(fset, df, targets=targets, overwrite=True)\n", + "\n", + "# Get the target path and check it\n", + "dbfs_data_path = fset.get_target_path()\n", + "dbfs_data_path" + ] + }, + { + "cell_type": "markdown", + "id": "fe173be8-18eb-40ec-9662-6639b0deaedb", + "metadata": {}, + "source": [ + "We can look and see how how our data is logged in the DataBricks cluster:\n", + "(only top 20 rows)" + ] + }, + { + "attachments": { + "f7ad0425-26fe-482c-b97c-c9493b05fbf2.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "c303d698-2f44-4f6f-8ce5-6a4f9f13534a", + "metadata": {}, + "source": [ + "![image.png](attachment:f7ad0425-26fe-482c-b97c-c9493b05fbf2.png)" + ] + }, + { + "cell_type": "markdown", + "id": "abd854e5", + "metadata": {}, + "source": [ + "## Create a data processing function\n", + "\n", + "The following code demonstrates how to create a simple data processing function using MLRun.
\n", + "The function will process the data and show some statistics.
\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4e759f9-7154-4397-8db3-93b808426bd1", + "metadata": {}, + "outputs": [], + "source": [ + "%%writefile process_data.py\n", + "\n", + "\n", + "# Here is an example of Spark processing.\n", + "from pyspark.sql import SparkSession\n", + "from pyspark.sql.functions import avg, min, max\n", + "import pandas as pd\n", + "import json\n", + "import fsspec\n", + "\n", + "def process_data(data_path: str, data_output_path: str):\n", + " spark = SparkSession.builder.appName(\"MusicDemo\").getOrCreate()\n", + " spark_df = spark.read.parquet(data_path, header=True)\n", + " spark_df = spark_df.drop(\"name\", \"id\")\n", + " \n", + " music_stats = spark_df.groupBy(\"favorite_music_type\").agg(\n", + " avg(\"age\").alias(\"avg_age\"),\n", + " min(\"age\").alias(\"min_age\"),\n", + " max(\"age\").alias(\"max_age\")\n", + " )\n", + " music_stats.show()\n", + " pandas_df = spark_df.toPandas()\n", + " pandas_df.to_parquet(data_output_path)\n", + " # spark_df.write.mode(\"overwrite\").parquet(data_output_path)\n", + "\n", + " return {\"music_data\": data_output_path}" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "13748b64-6a48-4500-a2a8-d9290dd082c5", + "metadata": {}, + "outputs": [], + "source": [ + "process_data_function = project.set_function(\n", + " func=\"./zeev-demos/mlflow-databricks/process_data.py\",\n", + " name=\"process-data\",\n", + " kind=\"databricks\",\n", + " image=\"mlrun/mlrun\",\n", + ")\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "2dbadf07-a32a-40da-b9bc-609070e4392d", + "metadata": {}, + "source": [ + "Set all parameters necessary for the function and run it." + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5642aa15-e8c0-4a72-a0a8-4cacd34fb63c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:34:45,422 [info] Storing function: {'name': 'process-data-process-data', 'uid': 'a9c770f8377046bda3061e61a5c015c2', 'db': 'http://mlrun-api:8080'}\n", + "> 2024-03-27 15:34:45,675 [info] Job is running in the background, pod: process-data-process-data-89bhh\n", + "> 2024-03-27 15:34:49,272 [info] Running with an existing cluster: {'cluster_id': '0327-134616-43m7kfxk'}\n", + "> 2024-03-27 15:34:49,492 [info] Starting to poll: 493449112310004\n", + "> 2024-03-27 15:34:49,539 [info] Workflow intermediate status: mlrun_task__15_34_48_703046: RunLifeCycleState.PENDING\n", + "> 2024-03-27 15:34:50,947 [info] Workflow intermediate status: mlrun_task__15_34_48_703046: RunLifeCycleState.PENDING\n", + "> 2024-03-27 15:34:53,063 [info] Workflow intermediate status: mlrun_task__15_34_48_703046: RunLifeCycleState.RUNNING\n", + "> 2024-03-27 15:34:56,737 [info] Workflow intermediate status: mlrun_task__15_34_48_703046: RunLifeCycleState.RUNNING\n", + "> 2024-03-27 15:35:00,947 [info] Artifacts found. Run name: mlrun_task__15_34_48_703046\n", + "> 2024-03-27 15:35:01,881 [info] Job finished: https://dbc-94c947ab-feb9.cloud.databricks.com/?o=4658245941722457#job/499259196347814/run/493449112310004\n", + "> 2024-03-27 15:35:01,881 [info] Logs:\n", + "+-------------------+------------------+-------+-------+\n", + "|favorite_music_type| avg_age|min_age|max_age|\n", + "+-------------------+------------------+-------+-------+\n", + "| Rock| 30.125| 27| 34|\n", + "| Classical|47.666666666666664| 38| 75|\n", + "| Pop| 24.0| 18| 38|\n", + "+-------------------+------------------+-------+-------+\n", + "\n", + "2024-03-27 15:34:54,980 - mlrun_logger - INFO - successfully wrote artifact details to the artifact JSON file in DBFS - music_data : /dbfs/demos/mlrun_databricks_demo/music_output_new.parquet\n", + "> 2024-03-27 15:35:02,182 [info] To track results use the CLI: {'info_cmd': 'mlrun get run a9c770f8377046bda3061e61a5c015c2 -p mlflow-tracking-example-guy', 'logs_cmd': 'mlrun logs a9c770f8377046bda3061e61a5c015c2 -p mlflow-tracking-example-guy'}\n", + "> 2024-03-27 15:35:02,182 [info] Or click for UI: {'ui_url': 'https://dashboard.default-tenant.app.llm-dev.iguazio-cd1.com/mlprojects/mlflow-tracking-example-guy/jobs/monitor/a9c770f8377046bda3061e61a5c015c2/overview'}\n", + "> 2024-03-27 15:35:02,182 [info] Run execution finished: {'status': 'completed', 'name': 'process-data-process-data'}\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
mlflow-tracking-example-guy0Mar 27 15:34:48completedprocess-data-process-data
v3io_user=zeevr
kind=databricks
owner=zeevr
mlrun/client_version=1.6.1
mlrun/client_python_version=3.9.16
host=process-data-process-data-89bhh
task_parameters={'timeout_minutes': 15, 'spark_app_code': 'IAoKaW1wb3J0IG9zCmltcG9ydCBsb2dnaW5nCm1scnVuX2xvZ2dlciA9IGxvZ2dpbmcuZ2V0TG9nZ2VyKCdtbHJ1bl9sb2dnZXInKQptbHJ1bl9sb2dnZXIuc2V0TGV2ZWwobG9nZ2luZy5ERUJVRykKCm1scnVuX2NvbnNvbGVfaGFuZGxlciA9IGxvZ2dpbmcuU3RyZWFtSGFuZGxlcigpCm1scnVuX2NvbnNvbGVfaGFuZGxlci5zZXRMZXZlbChsb2dnaW5nLkRFQlVHKQptbHJ1bl9mb3JtYXR0ZXIgPSBsb2dnaW5nLkZvcm1hdHRlcignJShhc2N0aW1lKXMgLSAlKG5hbWUpcyAtICUobGV2ZWxuYW1lKXMgLSAlKG1lc3NhZ2UpcycpCm1scnVuX2NvbnNvbGVfaGFuZGxlci5zZXRGb3JtYXR0ZXIobWxydW5fZm9ybWF0dGVyKQptbHJ1bl9sb2dnZXIuYWRkSGFuZGxlcihtbHJ1bl9jb25zb2xlX2hhbmRsZXIpCgptbHJ1bl9kZWZhdWx0X2FydGlmYWN0X3RlbXBsYXRlID0gJ21scnVuX3JldHVybl92YWx1ZV8nCm1scnVuX2FydGlmYWN0X2luZGV4ID0gMAoKCmRlZiBtbHJ1bl9sb2dfYXJ0aWZhY3QobmFtZT0nJywgcGF0aD0nJyk6CiAgICBnbG9iYWwgbWxydW5fYXJ0aWZhY3RfaW5kZXgKICAgIG1scnVuX2FydGlmYWN0X2luZGV4Kz0xICAjICBieSBob3cgbWFueSBhcnRpZmFjdHMgd2UgdHJpZWQgdG8gbG9nLCBub3QgaG93IG1hbnkgc3VjY2VlZC4KICAgIGlmIG5hbWUgaXMgTm9uZSBvciBuYW1lID09ICcnOgogICAgICAgIG5hbWUgPSBmJ3ttbHJ1bl9kZWZhdWx0X2FydGlmYWN0X3RlbXBsYXRlfXttbHJ1bl9hcnRpZmFjdF9pbmRleH0nCiAgICBpZiBub3QgcGF0aDoKICAgICAgICBtbHJ1bl9sb2dnZXIuZXJyb3IoZidwYXRoIHJlcXVpcmVkIGZvciBsb2dnaW5nIGFuIG1scnVuIGFydGlmYWN0IC0ge25hbWV9IDoge3BhdGh9JykKICAgICAgICByZXR1cm4KICAgIGlmIG5vdCBpc2luc3RhbmNlKG5hbWUsIHN0cikgb3Igbm90IGlzaW5zdGFuY2UocGF0aCwgc3RyKToKICAgICAgICBtbHJ1bl9sb2dnZXIuZXJyb3IoZiduYW1lIGFuZCBwYXRoIG11c3QgYmUgaW4gc3RyaW5nIHR5cGUgZm9yIGxvZ2dpbmcgYW4gbWxydW4gYXJ0aWZhY3QgLSB7bmFtZX0gOiB7cGF0aH0nKQogICAgICAgIHJldHVybgogICAgaWYgbm90IHBhdGguc3RhcnRzd2l0aCgnL2RiZnMnKSBhbmQgbm90IHBhdGguc3RhcnRzd2l0aCgnZGJmczovJyk6CiAgICAgICAgbWxydW5fbG9nZ2VyLmVycm9yKGYncGF0aCBmb3IgYW4gbWxydW4gYXJ0aWZhY3QgbXVzdCBzdGFydCB3aXRoIC9kYmZzIG9yIGRiZnM6LyAtIHtuYW1lfSA6IHtwYXRofScpCiAgICAgICAgcmV0dXJuCiAgICBtbHJ1bl9hcnRpZmFjdHNfcGF0aCA9ICcvZGJmcy9tbHJ1bl9kYXRhYnJpY2tzX3J1bnRpbWUvYXJ0aWZhY3RzX2RpY3Rpb25hcmllcy9tbHJ1bl9hcnRpZmFjdF9hOWM3NzBmODM3NzA0NmJkYTMwNjFlNjFhNWMwMTVjMi5qc29uJwogICAgdHJ5OgogICAgICAgIG5ld19kYXRhID0ge25hbWU6cGF0aH0KICAgICAgICBpZiBvcy5wYXRoLmV4aXN0cyhtbHJ1bl9hcnRpZmFjdHNfcGF0aCk6CiAgICAgICAgICAgIHdpdGggb3BlbihtbHJ1bl9hcnRpZmFjdHNfcGF0aCwgJ3IrJykgYXMganNvbl9maWxlOgogICAgICAgICAgICAgICAgZXhpc3RpbmdfZGF0YSA9IGpzb24ubG9hZChqc29uX2ZpbGUpCiAgICAgICAgICAgICAgICBleGlzdGluZ19kYXRhLnVwZGF0ZShuZXdfZGF0YSkKICAgICAgICAgICAgICAgIGpzb25fZmlsZS5zZWVrKDApCiAgICAgICAgICAgICAgICBqc29uLmR1bXAoZXhpc3RpbmdfZGF0YSwganNvbl9maWxlKQogICAgICAgIGVsc2U6CiAgICAgICAgICAgIHBhcmVudF9kaXIgPSBvcy5wYXRoLmRpcm5hbWUobWxydW5fYXJ0aWZhY3RzX3BhdGgpCiAgICAgICAgICAgIGlmIHBhcmVudF9kaXIgIT0gJy9kYmZzJzoKICAgICAgICAgICAgICAgIG9zLm1ha2VkaXJzKHBhcmVudF9kaXIsIGV4aXN0X29rPVRydWUpCiAgICAgICAgICAgIHdpdGggb3BlbihtbHJ1bl9hcnRpZmFjdHNfcGF0aCwgJ3cnKSBhcyBqc29uX2ZpbGU6CiAgICAgICAgICAgICAgICBqc29uLmR1bXAobmV3X2RhdGEsIGpzb25fZmlsZSkKICAgICAgICBzdWNjZXNzX2xvZyA9IGYnc3VjY2Vzc2Z1bGx5IHdyb3RlIGFydGlmYWN0IGRldGFpbHMgdG8gdGhlIGFydGlmYWN0IEpTT04gZmlsZSBpbiBEQkZTIC0ge25hbWV9IDoge3BhdGh9JwogICAgICAgIG1scnVuX2xvZ2dlci5pbmZvKHN1Y2Nlc3NfbG9nKQogICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyB1bmtub3duX2V4Y2VwdGlvbjoKICAgICAgICBtbHJ1bl9sb2dnZXIuZXJyb3IoZidsb2cgbWxydW4gYXJ0aWZhY3QgZmFpbGVkIC0ge25hbWV9IDoge3BhdGh9LiBlcnJvcjoge3Vua25vd25fZXhjZXB0aW9ufScpCgoKCgppbXBvcnQgYXJncGFyc2UKaW1wb3J0IGpzb24KcGFyc2VyID0gYXJncGFyc2UuQXJndW1lbnRQYXJzZXIoKQpwYXJzZXIuYWRkX2FyZ3VtZW50KCdoYW5kbGVyX2FyZ3VtZW50cycpCmhhbmRsZXJfYXJndW1lbnRzID0gcGFyc2VyLnBhcnNlX2FyZ3MoKS5oYW5kbGVyX2FyZ3VtZW50cwpoYW5kbGVyX2FyZ3VtZW50cyA9IGpzb24ubG9hZHMoaGFuZGxlcl9hcmd1bWVudHMpCgoKZnJvbSBweXNwYXJrLnNxbCBpbXBvcnQgU3BhcmtTZXNzaW9uCmZyb20gcHlzcGFyay5zcWwuZnVuY3Rpb25zIGltcG9ydCBhdmcsIG1pbiwgbWF4CmltcG9ydCBwYW5kYXMgYXMgcGQKaW1wb3J0IGpzb24KaW1wb3J0IGZzc3BlYwoKZGVmIHByb2Nlc3NfZGF0YShkYXRhX3BhdGg6IHN0ciwgZGF0YV9vdXRwdXRfcGF0aDogc3RyKToKICAgIHNwYXJrID0gU3BhcmtTZXNzaW9uLmJ1aWxkZXIuYXBwTmFtZSgnTXVzaWNEZW1vJykuZ2V0T3JDcmVhdGUoKQogICAgc3BhcmtfZGYgPSBzcGFyay5yZWFkLnBhcnF1ZXQoZGF0YV9wYXRoLCBoZWFkZXI9VHJ1ZSkKICAgIHNwYXJrX2RmID0gc3BhcmtfZGYuZHJvcCgnbmFtZScsICdpZCcpCiAgICBtdXNpY19zdGF0cyA9IHNwYXJrX2RmLmdyb3VwQnkoJ2Zhdm9yaXRlX211c2ljX3R5cGUnKS5hZ2coYXZnKCdhZ2UnKS5hbGlhcygnYXZnX2FnZScpLCBtaW4oJ2FnZScpLmFsaWFzKCdtaW5fYWdlJyksIG1heCgnYWdlJykuYWxpYXMoJ21heF9hZ2UnKSkKICAgIG11c2ljX3N0YXRzLnNob3coKQogICAgcGFuZGFzX2RmID0gc3BhcmtfZGYudG9QYW5kYXMoKQogICAgcGFuZGFzX2RmLnRvX3BhcnF1ZXQoZGF0YV9vdXRwdXRfcGF0aCkKICAgIHJldHVybiB7J211c2ljX2RhdGEnOiBkYXRhX291dHB1dF9wYXRofQpyZXN1bHQgPSBwcm9jZXNzX2RhdGEoKipoYW5kbGVyX2FyZ3VtZW50cykKCgppZiByZXN1bHQ6CiAgICBpZiBpc2luc3RhbmNlKHJlc3VsdCwgZGljdCk6CiAgICAgICAgZm9yIGtleSwgcGF0aCBpbiByZXN1bHQuaXRlbXMoKToKICAgICAgICAgICAgbWxydW5fbG9nX2FydGlmYWN0KG5hbWU9a2V5LCBwYXRoPXBhdGgpCiAgICBlbGlmIGlzaW5zdGFuY2UocmVzdWx0LCAobGlzdCwgdHVwbGUsIHNldCkpOgogICAgICAgIGZvciBhcnRpZmFjdF9wYXRoIGluIHJlc3VsdDoKICAgICAgICAgICAgbWxydW5fbG9nX2FydGlmYWN0KHBhdGg9YXJ0aWZhY3RfcGF0aCkKICAgIGVsaWYgaXNpbnN0YW5jZShyZXN1bHQsIHN0cik6CiAgICAgICAgbWxydW5fbG9nX2FydGlmYWN0KHBhdGg9cmVzdWx0KQogICAgZWxzZToKICAgICAgICBtbHJ1bl9sb2dnZXIud2FybmluZyhmJ2NhbiBub3QgbG9nIGFydGlmYWN0cyB3aXRoIHRoZSByZXN1bHQgb2YgaGFuZGxlciBmdW5jdGlvbiAtIHJlc3VsdCBpbiB1bnN1cHBvcnRlZCB0eXBlLiB7dHlwZShyZXN1bHQpfScpCg==', 'original_handler': 'process_data', 'artifact_json_path': '/mlrun_databricks_runtime/artifacts_dictionaries/mlrun_artifact_a9c770f8377046bda3061e61a5c015c2.json'}
data_path=dbfs:///demos/mlrun_databricks_demo/1711553684480_33/music.parquet
data_output_path=/dbfs/demos/mlrun_databricks_demo/music_output_new.parquet
music_data
databricks_run_metadata
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:35:07,910 [info] Run execution finished: {'status': 'completed', 'name': 'process-data-process-data'}\n" + ] + } + ], + "source": [ + "for name, val in job_env.items():\n", + " process_data_function.spec.env.append({\"name\": name, \"value\": val})\n", + "params = {\n", + " \"task_parameters\": {\"timeout_minutes\": 15},\n", + " \"data_path\": dbfs_data_path,\n", + " \"data_output_path\": output_path.replace(\"dbfs://\", \"/dbfs\"),\n", + "}\n", + "run = process_data_function.run(\n", + " handler=\"process_data\",\n", + " params=params,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "9a8db175-51f4-4218-afd1-752cc0e65216", + "metadata": { + "tags": [] + }, + "source": [ + "## Create an MLflow Xgboost function\n", + "\n", + "The following code demonstrates how to create a simple Xgboost model using MLflow and log the results.
\n", + "MLflow will log the model, parameters, metrics, and artifacts, and MLRun will track the run and collect the data." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "44a1e133-954d-47a3-9b0f-6e181fe12ea7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Overwriting training.py\n" + ] + } + ], + "source": [ + "%%writefile training.py\n", + "\n", + "import mlflow\n", + "import mlflow.xgboost\n", + "import xgboost as xgb\n", + "from mlflow import log_metric\n", + "from sklearn import datasets\n", + "from sklearn.metrics import accuracy_score, log_loss\n", + "from sklearn.model_selection import train_test_split\n", + "import pandas as pd\n", + "\n", + "def example_xgb_run(df: str):\n", + " df = pd.read_parquet(df)\n", + " \n", + " df = df.replace([\"f\", \"m\"], [0, 1])\n", + " df = df.replace([\"Pop\", \"Rock\", \"Classical\"], [0, 1, 2])\n", + " \n", + " # Prepare, train, and test data\n", + " y = df.pop('favorite_music_type')\n", + " X = df\n", + "\n", + " X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.2, random_state=42\n", + " )\n", + "\n", + " # Enable auto logging\n", + " mlflow.xgboost.autolog()\n", + "\n", + " dtrain = xgb.DMatrix(X_train, label=y_train)\n", + " dtest = xgb.DMatrix(X_test, label=y_test)\n", + "\n", + " with mlflow.start_run():\n", + " # Train model\n", + " params = {\n", + " \"objective\": \"multi:softprob\",\n", + " \"num_class\": 3,\n", + " \"learning_rate\": 0.3,\n", + " \"eval_metric\": \"mlogloss\",\n", + " \"colsample_bytree\": 1.0,\n", + " \"subsample\": 1.0,\n", + " \"seed\": 42,\n", + " }\n", + " model = xgb.train(params, dtrain, evals=[(dtrain, \"train\")])\n", + " \n", + " # Evaluate model\n", + " y_proba = model.predict(dtest)\n", + " y_pred = y_proba.argmax(axis=1)\n", + " loss = log_loss(y_test, y_proba)\n", + " acc = accuracy_score(y_test, y_pred)\n", + " \n", + " # Log metrics by hand\n", + " mlflow.log_metrics({\"log_loss\": loss, \"accuracy\": acc})" + ] + }, + { + "cell_type": "markdown", + "id": "1cf984c9-78a9-443f-9465-111263101dcd", + "metadata": {}, + "source": [ + "## Log the data from MLflow in MLRun " + ] + }, + { + "cell_type": "markdown", + "id": "365e4b39-9f39-40ae-aac4-7c4f42bce9bd", + "metadata": {}, + "source": [ + "### Change the MLRun configuration to use the tracker\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "0b194d04-e08f-4161-a65b-4f18d10fdbf0", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun\n", + "\n", + "mlrun.mlconf.external_platform_tracking.enabled = True" + ] + }, + { + "cell_type": "markdown", + "id": "b16bb4db-8a2a-4453-a42e-0e8e74ab8f53", + "metadata": {}, + "source": [ + "These are the three options to run tracking:\n", + "- Set: `mlrun.mlconf.external_platform_tracking.mlflow.match_experiment_to_runtime` to True. This determines the run id and is the safest method\n", + "- Set the experiment name at: `mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.set`. This determines the experiment mlrun will track and find the run added to it.\n", + "- Just run it, mlrun will look across all experiments and search for added run, this is not recomended." + ] + }, + { + "cell_type": "markdown", + "id": "8b7bc72a-bd1b-408a-afa8-e474d91c4a20", + "metadata": {}, + "source": [ + "### Create the mlrun function" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "3382b909-a8dc-41a3-afb1-b64df9bb7318", + "metadata": {}, + "outputs": [], + "source": [ + "# Use the first run option from above\n", + "mlrun.mlconf.external_platform_tracking.mlflow.match_experiment_to_runtime = True\n", + "\n", + "# Create a MLRun function using the example train file (all the functions must be located in it):\n", + "training_func = project.set_function(\n", + " func=\"training.py\",\n", + " name=\"example-xgb-run\",\n", + " kind=\"job\",\n", + " image=\"mlrun/mlrun\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "91597f57-364d-4d2a-b926-97b9d8afc81b", + "metadata": {}, + "source": [ + "### Run the function\n", + "\n", + "Run the function using MLRun. This will log the data from MLflow in MLRun.
\n", + "After running the function, you can look at the UI and see that all metrics and parameters are logged in MLRun." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "5a726ca8-8057-41ed-be4e-35e5e0582de9", + "metadata": {}, + "outputs": [], + "source": [ + "import mlrun.feature_store as fstore\n", + "\n", + "feature_set = fstore.get_feature_set(\"music_fset\", \"mlflow-tracking-example\")" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "4de1229a-cc59-4846-8473-3178e682efa6", + "metadata": {}, + "outputs": [], + "source": [ + "df = feature_set.to_dataframe()\n", + "df = df.drop(['id'], axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "8249a933-031c-4f2e-88c2-161dd4cfb7ed", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# df = project.list_().to_objects()[0].to_dataitem().as_df()\n", + "df_path = \"./music.parquet\"\n", + "df.to_parquet(df_path)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "8ba452dd-1756-4bfb-af64-d741e234dba3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:37:22,829 [info] Storing function: {'name': 'example-xgb-run-example-xgb-run', 'uid': '6ff324dd21d64b6290d45a001957dda2', 'db': 'http://mlrun-api:8080'}\n", + "> 2024-03-27 15:37:22,912 [warning] `mlconf.external_platform_tracking.mlflow.match_experiment_to_runtime` is set to True but the MLFlow experiment name environment variable ('MLFLOW_EXPERIMENT_NAME') is set for using the name: 'example-xgb-run-example-xgb-run'. This name will be overriden with MLRun's runtime name as set in the MLRun configuration: 'example-xgb-run-example-xgb-run'.\n", + "[0]\ttrain-mlogloss:0.82467\n", + "[1]\ttrain-mlogloss:0.64706\n", + "[2]\ttrain-mlogloss:0.52480\n", + "[3]\ttrain-mlogloss:0.43768\n", + "[4]\ttrain-mlogloss:0.37410\n", + "[5]\ttrain-mlogloss:0.32686\n", + "[6]\ttrain-mlogloss:0.29057\n", + "[7]\ttrain-mlogloss:0.26192\n", + "[8]\ttrain-mlogloss:0.23885\n", + "[9]\ttrain-mlogloss:0.22004\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024/03/27 15:37:23 WARNING mlflow.utils.autologging_utils: MLflow autologging encountered a warning: \"/User/.pythonlibs/mlrun-base/lib/python3.9/site-packages/mlflow/types/utils.py:393: UserWarning: Hint: Inferred schema contains integer column(s). Integer columns in Python cannot represent missing values. If your input data contains missing values at inference time, it will be encoded as floats and will cause a schema enforcement error. The best way to avoid this problem is to infer the model schema based on a realistic data sample (training dataset) that includes missing values. Alternatively, you can declare integer columns as doubles (float64) whenever these columns may have missing values. See `Handling Integers With Missing Values `_ for more details.\"\n", + "2024/03/27 15:37:23 WARNING mlflow.utils.autologging_utils: MLflow autologging encountered a warning: \"/User/.pythonlibs/mlrun-base/lib/python3.9/site-packages/xgboost/core.py:160: UserWarning: [15:37:23] WARNING: /workspace/src/c_api/c_api.cc:1240: Saving into deprecated binary model format, please consider using `json` or `ubj`. Model format will default to JSON in XGBoost 2.2 if not specified.\"\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
projectuiditerstartstatenamelabelsinputsparametersresultsartifacts
mlflow-tracking-example-guy0Mar 27 15:37:22completedexample-xgb-run-example-xgb-run
v3io_user=zeevr
kind=local
owner=zeevr
host=jupyter-zeevr-9f4ffb7bb-8c4mf
mlflow-user=iguazio
mlflow-run-name=stately-cow-437
mlflow-run-id=f66d6149d54c4958a2485c941d86a538
mlflow-experiment-id=608717337209571124
df
colsample_bytree=1.0
custom_metric=None
early_stopping_rounds=None
eval_metric=mlogloss
learning_rate=0.3
maximize=None
num_boost_round=10
num_class=3
objective=multi:softprob
seed=42
subsample=1.0
verbose_eval=True
accuracy=0.7142857142857143
log_loss=0.9622776094122579
train-mlogloss=0.2200447738170624
feature_importance_weight_json
feature_importance_weight_png
model
\n", + "
\n", + "
\n", + "
\n", + " Title\n", + " ×\n", + "
\n", + " \n", + "
\n", + "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n" + ] + }, + { + "data": { + "text/html": [ + " > to track results use the .show() or .logs() methods or click here to open in UI" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:37:31,415 [info] Run execution finished: {'status': 'completed', 'name': 'example-xgb-run-example-xgb-run'}\n" + ] + } + ], + "source": [ + "# Run the example code using mlrun\n", + "train_run = training_func.run(\n", + " local=True,\n", + " handler=\"example_xgb_run\",\n", + " inputs={\"df\": df_path},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "655d5c46-2c0a-46f2-bbec-a58853260476", + "metadata": {}, + "source": [ + "### Examine the results\n", + "\n", + "You can examine the results using the UI or by looking at the outputs of the run.
\n", + "The outputs include the model, the metrics, and the artifacts, and are completely independent of MLflow." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d23beb02-e455-48dc-9d9f-9e3d4549ec71", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'accuracy': 0.7142857142857143,\n", + " 'log_loss': 0.9622776094122579,\n", + " 'train-mlogloss': 0.2200447738170624,\n", + " 'feature_importance_weight_json': 'store://artifacts/mlflow-tracking-example-guy/example-xgb-run-example-xgb-run_feature_importance_weight_json@6ff324dd21d64b6290d45a001957dda2',\n", + " 'feature_importance_weight_png': 'store://artifacts/mlflow-tracking-example-guy/example-xgb-run-example-xgb-run_feature_importance_weight_png@6ff324dd21d64b6290d45a001957dda2',\n", + " 'model': 'store://artifacts/mlflow-tracking-example-guy/example-xgb-run-example-xgb-run_model@6ff324dd21d64b6290d45a001957dda2'}" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_run.outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "b05f4c2a-5f2d-4d7c-9c21-39c0a949cfc3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'accuracy': 0.7142857142857143,\n", + " 'log_loss': 0.9622776094122579,\n", + " 'train-mlogloss': 0.2200447738170624}" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train_run.status.results" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "925b3445-18b4-4497-9783-52b4cd069401", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAcwAAAFZCAYAAAAVcB92AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAVY0lEQVR4nO3debRsdXmn8efL5IBMAUKY5DqAiC1TR8UWBY3aGuzWXp0gCUFITCNqSExruzRtEofWoFnRGGyTEAfoaIiIkaB2KyTIjRhbBplEQAVBZkSmCwI28PYfex8pDnd4L9x7qrjn+ax1FrV37VP7V79DnefuXXWqUlVIkqSVW2/aA5Ak6dHAYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJa1VSV6X5M/X8G1elGT/5rZXJHnxatz2kUne/3DHpnWXwdRUjL/E7kpyx8TXdmvgNtu/GB+pJO9M8qmF2t/KJDksyRnTHsd8STYC3gH86Zq83ap6RlWd/khvJ8n+Sa6et/pvgIOT/PwjvX2tWwympuk/VNUTJr6uneZgkmwwzf0/XDM+7lcCl1TVNdMeSFdV3Q38H+A10x6LZovB1ExJslmSjye5Lsk1Sf5HkvXH656S5LQkP05yU5JPJ9l8vO5vgScCXxiPVt+6vKOHyaPQ8QjxxCSfSnI7cNjK9t8YeyV5Q5LvJVmW5D3jmP81ye1JThiPuH52ZJPkD8b7ckWSg+fNw/9K8qMkVyZ5R5L1xusOS/L1JB9K8mPgM8BfAc8d7/ut43YHJDl33PdVSd45cftLxvEemuSH4xj++8T1649ju2y8L+ck2XG8btckpya5OcmlSQ5cybS8HFg6cbvHJXnzeHn7cQxvnPj53jxxP1+R5Lwkt45zuPsKfo6PG2/3liQXjz/7+UeNeya5IMltST6T5LFJNmYI43bLOctxOnDAKn7kWmQMpmbNscC9wFOBvYCXAr89XhfgT4DtgKcDOwLvBKiqQ4Af8sBR6wea+3slcCKwOfDpVey/498D/xbYB3grcAzwG+NY/w3waxPb/gKwFbA9cChwTJKnjdcdDWwGPBnYj+Fo5zcnvvc5wOXANuPtHwF8Y7zvm4/b3Dl+3+YMv/xfn+RV88a7L/A04JeAP0ry9HH9fx3H+svApsBvAT8ZI3Mq8HfAzwMHAR9NstsK5uOZwKUTy0uB/cfL+4334QUTy1+rqvuT7AV8AngdsCXw18DJSR6znH38MbCEYa5eMs7HfAcCLwOeBOwOHFZVdzIE/drlnOW4GNhjBfdJi5TB1DSdNB493JrkpCTbMPyCflNV3VlVNwIfYvilTFV9v6pOrap7qupHwAcZfsk+Et+oqpOq6n6GMKxw/00fqKrbq+oi4NvAKVV1eVXdxnA0s9e87f9wvD9LgS8BB45HtAcBb6+qZVV1BfBnwCET33dtVR1dVfdW1V3LG0hVnV5VF1bV/VV1AXA8D52vd1XVXVV1PnA+D0Tit4F3VNWlNTi/qn4MvAK4oqo+Oe77XOBzwK+uYD42B5ZNLC8F9h2PIl8AfAB43njdfjxwNHo48NdV9c2quq+qjgPuYfiHyHwHAu+rqluq6mrgL5azzV9U1bVVdTPwBWDPFYx3zjKGf7BIPzPLz31o3feqqvqnuYUkzwY2BK5LMrd6PeCq8fptgA8Dzwc2Ga+75RGO4aqJyzutbP9NN0xcvms5y78wsXzLeJQz50qGo+etxnFcOe+67Vcw7uVK8hzgKIYj242AxwCfnbfZ9ROXfwI8Yby8I3DZcm52J+A5c6d9RxsAf7uCYdzC8LMCoKouS3InQ7CeD7wHeO14ZL0fD8RuJ+DQJEdO3NZGDPMz33Y8eD6WNzfz7+eqXmC2CXDbKrbRIuMRpmbJVQxHEVtV1ebj16ZV9Yzx+vcBBTyzqjZlOPWWie+vebd3J/D4uYXxyG3redtMfs+q9r+mbTGe4pzzROBa4Cbg/zFEY/K6yRfOzL+v85dhOG16MrBjVW3G8DxnlrPd8lwFPGUF65dOzM/m46nM16/gdi4Adpm3binwK8BG44uBljKckt4COG9iP++dt5/HV9Xxy9nHdcAOE8s7du7gaHnzBsMp//NX43a0CBhMzYyqug44BfizJJsmWW98IcjcacRNgDuA25JsD/y3eTdxA8PzWHO+Czx2fPHLhgx/3rC858C6+18b3pVkoyTPZzjd+dmqug84AXhvkk2S7MTwnOLK/oTlBmCHuRcVjTYBbq6qu8ej919fjXF9DHhPkp0z2D3JlsAXgV2SHJJkw/HrWRPPfc73v3noaeClwO8A/zIunz4unzHedxj+tOOIJM8Z97/x+HPchIc6AXh7ki3G/y9+ZzXu5w3Alknmn37dj+EUuvQzBlOz5jUMp96+w3A670Rg2/G6dwF7M5wq+xLwD/O+90+Ad4zPib5lfN7wDQy//K9hOOKc/+rJ1dn/mnb9uI9rGV5wdERVXTJedyTDeC8HzmA4WvzESm7rNOAi4PokN43r3gC8O8ky4I8YwtL1wXH7U4DbgY8Dj6uqZQwvhDpoHPf1wPtZ8T9EvgDsmgf/je1ShpjPBfMMhjMBc8tU1dnAfwE+wjBH3wcOW8E+3s3wc/0B8E8MP7N7OndynO/jgcvH/2+2S/JYhueyj+vchhaPVK3ojISktSXDu9R8qqp2WMWmj3pJDgd2q6o3LdD+Xg8cVFUP68zA+LzpjlX11jU7Mj3a+aIfSWtVVR2zNm8/ybYMp+K/AewMvJnhyPRhqaqj19DQtI4xmJIe7TZi+DvNJwG3An8PfHSaA9K6yVOykiQ1+KIfSZIaZu6U7FZbbVVLliyZ9jAkSYvEOeecc1NVzf8b7YeYuWAuWbKEs88+e9rDkCQtEkmuXPVWnpKVJKnFYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqmLn3kr3wmttY8rYvTXsYkqQZdcVRB0xlvx5hSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNRhMSZIaDKYkSQ0GU5KkBoMpSVKDwZQkqcFgSpLUYDAlSWowmJIkNax2MJOclOScJBclOXxc99ok301yZpK/SfKRcf3WST6X5Kzx63lr+g5IkrQQNngY3/NbVXVzkscBZyX5EvCHwN7AMuA04Pxx2w8DH6qqM5I8EfgK8PQ1MG5JkhbUwwnm7yb5T+PlHYFDgKVVdTNAks8Cu4zXvxjYLcnc926a5AlVdcfkDY5HqocDrL/p1g9jSJIkrV2rFcwk+zNE8LlV9ZMkpwOXsOKjxvWAfarq7pXdblUdAxwD8Jhtd67VGZMkSQthdZ/D3Ay4ZYzlrsA+wMbAfkm2SLIB8J8ntj8FOHJuIcmej3C8kiRNxeoG88vABkkuBo4C/i9wDfA+4Ezg68AVwG3j9r8L/GKSC5J8BzhiTQxakqSFtlqnZKvqHuDl89cnObuqjhmPMD8PnDRufxPw6jUwTkmSpmpN/R3mO5OcB3wb+AFjMCVJWlc8nFfJPkRVvWVN3I4kSbPKd/qRJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpIYNpj2A+Z65/WacfdQB0x6GJEkP4hGmJEkNBlOSpAaDKUlSg8GUJKnBYEqS1GAwJUlqMJiSJDUYTEmSGgymJEkNBlOSpAaDKUlSQ6pq2mN4kCTLgEunPY4ZsBVw07QHMQOcB+dgjvMwcB7W/BzsVFVbr2qjmXvzdeDSqvrFaQ9i2pKc7Tw4D+AczHEeBs7D9ObAU7KSJDUYTEmSGmYxmMdMewAzwnkYOA/OwRznYeA8TGkOZu5FP5IkzaJZPMKUJGnmGExJkhpmKphJXpbk0iTfT/K2aY9noST5RJIbk3x7Yt3PJTk1yffG/24xzTGubUl2TPLVJN9JclGS3xvXL7Z5eGySM5OcP87Du8b1T0ryzfGx8ZkkG017rGtbkvWTnJvki+PyYpyDK5JcmOS8JGeP6xbVYwIgyeZJTkxySZKLkzx3GvMwM8FMsj7wP4GXA7sBv5Zkt+mOasEcC7xs3rq3Af9cVTsD/zwur8vuBd5cVbsB+wBvHH/+i20e7gFeVFV7AHsCL0uyD/B+4ENV9VTgFuC10xvigvk94OKJ5cU4BwAvrKo9J/7ucLE9JgA+DHy5qnYF9mD4/2LB52Fmggk8G/h+VV1eVT8F/h545ZTHtCCq6l+Am+etfiVw3Hj5OOBVCzmmhVZV11XVt8bLyxgeENuz+OahquqOcXHD8auAFwEnjuvX+XlIsgNwAPCxcTkssjlYiUX1mEiyGfAC4OMAVfXTqrqVKczDLAVze+CqieWrx3WL1TZVdd14+Xpgm2kOZiElWQLsBXyTRTgP46nI84AbgVOBy4Bbq+recZPF8Nj4c+CtwP3j8pYsvjmA4R9LpyQ5J8nh47rF9ph4EvAj4JPjKfqPJdmYKczDLAVTK1DD3/4sir//SfIE4HPAm6rq9snrFss8VNV9VbUnsAPDmZddpzuihZXkFcCNVXXOtMcyA/atqr0Znqp6Y5IXTF65SB4TGwB7A39ZVXsBdzLv9OtCzcMsBfMaYMeJ5R3GdYvVDUm2BRj/e+OUx7PWJdmQIZafrqp/GFcvunmYM552+irwXGDzJHPv/byuPzaeB/zHJFcwPDXzIobnsBbTHABQVdeM/70R+DzDP6AW22PiauDqqvrmuHwiQ0AXfB5mKZhnATuPr4TbCDgIOHnKY5qmk4FDx8uHAv84xbGsdeNzVB8HLq6qD05ctdjmYeskm4+XHwe8hOH53K8CvzJutk7PQ1W9vap2qKolDL8HTquqg1lEcwCQZOMkm8xdBl4KfJtF9pioquuBq5I8bVz1S8B3mMI8zNQ7/ST5ZYbnLtYHPlFV753uiBZGkuOB/Rk+suYG4I+Bk4ATgCcCVwIHVtX8FwatM5LsC3wNuJAHnrf6A4bnMRfTPOzO8AKG9Rn+QXtCVb07yZMZjrZ+DjgX+I2qumd6I10YSfYH3lJVr1hsczDe38+PixsAf1dV702yJYvoMQGQZE+GF4BtBFwO/Cbj44MFnIeZCqYkSbNqlk7JSpI0swymJEkNBlOSpAaDKUlSg8GUJKnBYEprUZI7Vr3VGt3fkiS/vpD7lBYLgymtI8Z3wVkCGExpLTCY0gJIsn+SpUn+McnlSY5KcvD42ZcXJnnKuN2xSf4qydlJvju+r+rc52R+ctz23CQvHNcfluTkJKcxfMTRUcDzx89P/P3xiPNrSb41fv27ifGcPvEZg58e322JJM9K8q8ZPpPzzCSbjG8I/6dJzkpyQZLXTWUipSnaYNWbSFpD9gCezvBRbpcDH6uqZ2f4sOwjgTeN2y1heM/QpwBfTfJU4I0M7zH9zCS7MnyCxS7j9nsDu1fVzZPvjAOQ5PHAS6rq7iQ7A8cDc5+ruBfwDOBa4OvA85KcCXwGeHVVnZVkU+Auhs+evK2qnpXkMcDXk5xSVT9Y89MkzSaDKS2cs+Y+jijJZcAp4/oLgRdObHdCVd0PfC/J5QyfVrIvcDRAVV2S5EpgLpinruQtwTYEPjK+tdh9E98DcGZVXT2O5zyGUN8GXFdVZ437un28/qXA7knm3st1M2BnwGBq0TCY0sKZfN/T+yeW7+fBj8X571e5qvevvHMl1/0+w/sT78HwFMzdKxjPfaz890GAI6vqK6sYi7TO8jlMafb8apL1xuc1nwxcyvDG9AcDjKdinziun28ZsMnE8mYMR4z3A4cwvKn7ylwKbJvkWeO+NhlfTPQV4PXjR7CRZJfxEzSkRcMjTGn2/BA4E9gUOGJ8/vGjwF8muRC4Fzisqu4ZX6cz6QLgviTnA8cCHwU+l+Q1wJdZ+dEoVfXTJK8Gjh4/Xuwu4MUMnxSxBPjW+OKgHwGvWgP3VXrU8NNKpBmS5Fjgi1V14rTHIunBPCUrSVKDR5iSJDV4hClJUoPBlCSpwWBKktRgMCVJajCYkiQ1GExJkhoMpiRJDQZTkqQGgylJUsP/BySEjToO/wa1AAAAAElFTkSuQmCC", + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "train_run.artifact(\"feature_importance_weight_png\").show()" + ] + }, + { + "cell_type": "markdown", + "id": "227c4358-4c34-4d1c-acb4-e37ca110b8bf", + "metadata": {}, + "source": [ + "### You can also examine the results using the UI" + ] + }, + { + "cell_type": "markdown", + "id": "dde00fd1-a1f0-4c56-80c2-c5d36a9062a1", + "metadata": {}, + "source": [ + "Look at collected artifacts: " + ] + }, + { + "attachments": { + "95b9b198-55c9-4a67-b0bf-103c9ae0272e.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "8cda6c13-7fee-4284-aacf-81a506a426da", + "metadata": {}, + "source": [ + "![image.png](attachment:95b9b198-55c9-4a67-b0bf-103c9ae0272e.png)" + ] + }, + { + "cell_type": "markdown", + "id": "e1525230-e10c-4f48-b951-bc73642bb3e4", + "metadata": {}, + "source": [ + "And at results:" + ] + }, + { + "attachments": { + "66422f79-9b46-4e07-9796-c1b350c26c9c.png": { + "image/png": "" + } + }, + "cell_type": "markdown", + "id": "217279f8-6af1-4209-b0ec-3d3d829ceed9", + "metadata": {}, + "source": [ + "![image.png](attachment:66422f79-9b46-4e07-9796-c1b350c26c9c.png)" + ] + }, + { + "cell_type": "markdown", + "id": "844edc05-0b6a-4e84-9213-1d3cbf6f833e", + "metadata": {}, + "source": [ + "## Use the function for model serving" + ] + }, + { + "cell_type": "markdown", + "id": "40182a6f-fc46-4a33-a7f5-7ee8ee171966", + "metadata": {}, + "source": [ + "### Create the server and serving function\n", + "\n", + "Create a serving function that uses the model from the previous run and serves it using MLRun.
\n", + "We will create a mock server to test the model in a local environment." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "f5fe910b-e177-4af7-84de-41a571d1774c", + "metadata": {}, + "outputs": [], + "source": [ + "serving_func = project.set_function(\n", + " func=\"function.yaml\",\n", + " name=\"example-xgb-server\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "ddbfd48f-a90e-4fe6-9caa-ddffeacf63d1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Add the model\n", + "serving_func.add_model(\n", + " \"mlflow_xgb_model\",\n", + " class_name=\"MLFlowModelServer\",\n", + " model_path=train_run.outputs[\"model\"],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "2298d111-2f53-4b84-be9e-e4e8a228dcc4", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> 2024-03-27 15:37:31,627 [info] model mlflow_xgb_model was loaded\n", + "> 2024-03-27 15:37:31,628 [info] Loaded ['mlflow_xgb_model']\n" + ] + } + ], + "source": [ + "# Create a mock server\n", + "server = serving_func.to_mock_server()" + ] + }, + { + "cell_type": "markdown", + "id": "f54d7c06-4972-4881-9bc9-fba7db0adbe4", + "metadata": {}, + "source": [ + "### Test the model " + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "4f256490-f225-4bd6-ac8a-5fc12a0f335d", + "metadata": {}, + "outputs": [], + "source": [ + "# An example taken randomly \n", + "result = server.test(\"/v2/models/mlflow_xgb_model/predict\", {\"inputs\":[{\"age\": 20, \"gender\": 0}]})" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "47839f4b-bb2d-4341-99c5-e34fa31270c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '43a61d06f2694fa695bdd6561b487131',\n", + " 'model_name': 'mlflow_xgb_model',\n", + " 'outputs': [[0.9242361187934875, 0.0418272465467453, 0.033936627209186554]]}" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Look at the result, it shows the probability of the given example to be each of the \n", + "# irises featured in the dataset\n", + "result" + ] + }, + { + "cell_type": "markdown", + "id": "d4fc6c73-0963-4814-bd5f-2d27b464823e", + "metadata": {}, + "source": [ + "We predicted that a 20 year old female would like pop!" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mlrun-base", + "language": "python", + "name": "conda-env-mlrun-base-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.16" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/mlflow_utils/mlflow_utils.py b/mlflow_utils/mlflow_utils.py new file mode 100644 index 000000000..fb6124bef --- /dev/null +++ b/mlflow_utils/mlflow_utils.py @@ -0,0 +1,45 @@ +import zipfile +from typing import Any, Dict +import mlflow +from mlrun.serving.v2_serving import V2ModelServer +import pandas as pd + + +class MLFlowModelServer(V2ModelServer): + """ + MLFlow tracker Model serving class, inheriting the V2ModelServer class for being initialized automatically by the model + server and be able to run locally as part of a nuclio serverless function, or as part of a real-time pipeline. + """ + + def load(self): + """ + loads a model that was logged by the MLFlow tracker model + """ + # Unzip the model dir and then use mlflow's load function + model_file, _ = self.get_model(".zip") + model_path_unzip = model_file.replace(".zip", "") + + with zipfile.ZipFile(model_file, "r") as zip_ref: + zip_ref.extractall(model_path_unzip) + + self.model = mlflow.pyfunc.load_model(model_path_unzip) + + def predict(self, request: Dict[str, Any]) -> list: + """ + Infer the inputs through the model. The inferred data will + be read from the "inputs" key of the request. + + :param request: The request to the model using xgboost's predict. + The input to the model will be read from the "inputs" key. + + :return: The model's prediction on the given input. + """ + + # Get the inputs and set to accepted type: + inputs = pd.DataFrame(request["inputs"]) + + # Predict using the model's predict function: + predictions = self.model.predict(inputs) + + # Return as list: + return predictions.tolist() diff --git a/mlflow_utils/requirements.txt b/mlflow_utils/requirements.txt new file mode 100644 index 000000000..2ecc4ff91 --- /dev/null +++ b/mlflow_utils/requirements.txt @@ -0,0 +1,3 @@ +mlflow==2.12.2 +lightgbm +xgboost \ No newline at end of file diff --git a/mlflow_utils/test_mlflow_utils.py b/mlflow_utils/test_mlflow_utils.py new file mode 100644 index 000000000..70d6ce03f --- /dev/null +++ b/mlflow_utils/test_mlflow_utils.py @@ -0,0 +1,179 @@ +# Copyright 2018 Iguazio +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import tempfile + +import lightgbm as lgb +import mlflow +import mlflow.environment_variables +import mlflow.xgboost +import pytest +import xgboost as xgb +from sklearn import datasets +from sklearn.metrics import accuracy_score, log_loss +from sklearn.model_selection import train_test_split + +import os +# os.environ["MLRUN_IGNORE_ENV_FILE"] = "True" #TODO remove before push + +import mlrun +import mlrun.launcher.local +# Important: +# unlike mlconf which resets back to default after each test run, the mlflow configurations +# and env vars don't, so at the end of each test we need to redo anything we set in that test. +# what we cover in these tests: logging "regular" runs with, experiment name, run id and context +# name (last two using mlconf), failing run mid-way, and a run with no handler. +# we also test here importing of runs, artifacts and models from a previous run. + +# simple mlflow example of lgb logging +def lgb_run(): + # prepare train and test data + iris = datasets.load_iris() + X = iris.data + y = iris.target + X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 + ) + + # enable auto logging + mlflow.lightgbm.autolog() + + train_set = lgb.Dataset(X_train, label=y_train) + + with mlflow.start_run(): + # train model + params = { + "objective": "multiclass", + "num_class": 3, + "learning_rate": 0.1, + "metric": "multi_logloss", + "colsample_bytree": 1.0, + "subsample": 1.0, + "seed": 42, + } + # model and training data are being logged automatically + model = lgb.train( + params, + train_set, + num_boost_round=10, + valid_sets=[train_set], + valid_names=["train"], + ) + + # evaluate model + y_proba = model.predict(X_test) + y_pred = y_proba.argmax(axis=1) + loss = log_loss(y_test, y_proba) + acc = accuracy_score(y_test, y_pred) + + # log metrics + mlflow.log_metrics({"log_loss": loss, "accuracy": acc}) + + +# simple mlflow example of xgb logging +def xgb_run(): + # prepare train and test data + iris = datasets.load_iris() + x = iris.data + y = iris.target + x_train, x_test, y_train, y_test = train_test_split( + x, y, test_size=0.2, random_state=42 + ) + + # enable auto logging + mlflow.xgboost.autolog() + + dtrain = xgb.DMatrix(x_train, label=y_train) + dtest = xgb.DMatrix(x_test, label=y_test) + + with mlflow.start_run(): + # train model + params = { + "objective": "multi:softprob", + "num_class": 3, + "learning_rate": 0.3, + "eval_metric": "mlogloss", + "colsample_bytree": 1.0, + "subsample": 1.0, + "seed": 42, + } + # model and training data are being logged automatically + model = xgb.train(params, dtrain, evals=[(dtrain, "train")]) + # evaluate model + y_proba = model.predict(dtest) + y_pred = y_proba.argmax(axis=1) + loss = log_loss(y_test, y_proba) + acc = accuracy_score(y_test, y_pred) + # log metrics + mlflow.log_metrics({"log_loss": loss, "accuracy": acc}) + + +@pytest.mark.parametrize("handler", ["xgb_run", "lgb_run"]) +def test_track_run_with_experiment_name(handler): + """ + This test is for tracking a run logged by mlflow into mlrun while it's running using the experiment name. + first activate the tracking option in mlconf, then we name the mlflow experiment, + then we run some code that is being logged by mlflow using mlrun, + and finally compare the mlrun we tracked with the original mlflow run using the validate func + """ + # Enable general tracking + mlrun.mlconf.external_platform_tracking.enabled = True + # Set the mlflow experiment name + mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.set(f"{handler}_test_track") + with tempfile.TemporaryDirectory() as test_directory: + mlflow.set_tracking_uri(test_directory) # Tell mlflow where to save logged data + + # Create a project for this tester: + project = mlrun.get_or_create_project(name="default", context=test_directory) + + # Create a MLRun function using the tester source file (all the functions must be located in it): + func = project.set_function( + func=__file__, + name=f"{handler}-test", + kind="job", + image="mlrun/mlrun", + requirements=["mlflow"], + ) + # mlflow creates a dir to log the run, this makes it in the tmpdir we create + trainer_run = func.run( + local=True, + handler=handler, + artifact_path=test_directory, + ) + + serving_func = project.set_function( + func=os.path.abspath("function.yaml"), + name=f"{handler}-server", + ) + model_name = f"{handler}-model" + # Add the model + upper_handler = handler.replace("_", "-") + model_path = test_directory + f"/{upper_handler}-test-{upper_handler}/0/model/" + serving_func.add_model( + model_name, + class_name="MLFlowModelServer", + model_path=model_path, + ) + + # Create a mock server + server = serving_func.to_mock_server() + + # An example taken randomly + result = server.test(f"/v2/models/{model_name}/predict", {"inputs": [[5.1, 3.5, 1.4, 0.2]]}) + print(result) + assert result + # unset mlflow experiment name to default + mlflow.environment_variables.MLFLOW_EXPERIMENT_NAME.unset() + + diff --git a/model_server/function.yaml b/model_server/function.yaml index 1539a3810..cb082c184 100644 --- a/model_server/function.yaml +++ b/model_server/function.yaml @@ -44,7 +44,7 @@ spec: - name: MODEL_CLASS value: ClassifierModel handler: model_server:handler - runtime: python:3.6 + runtime: python:3.9 volumes: [] source: '' function_kind: serving diff --git a/pii_recognizer/function.yaml b/pii_recognizer/function.yaml index 54b448d9c..069fa1ffe 100644 --- a/pii_recognizer/function.yaml +++ b/pii_recognizer/function.yaml @@ -2,13 +2,14 @@ kind: job metadata: name: pii-recognizer tag: '' - hash: b09b7b9a4ffd55088d665a0191055411e9198a2f + hash: 818930645d33704e9cada919769ee9d93cbb9434 project: '' labels: author: pgw categories: - machine-learning - data-preparation + - NLP spec: command: '' args: [] diff --git a/pii_recognizer/item.yaml b/pii_recognizer/item.yaml index 2f618febc..41ead33b6 100644 --- a/pii_recognizer/item.yaml +++ b/pii_recognizer/item.yaml @@ -2,6 +2,7 @@ apiVersion: v1 categories: - machine-learning - data-preparation + - NLP description: This function is used to recognize PII in a directory of text files doc: '' example: pii_recognizer.ipynb @@ -30,5 +31,5 @@ spec: - st-annotated-text - https://huggingface.co/beki/en_spacy_pii_distilbert/resolve/main/en_spacy_pii_distilbert-any-py3-none-any.whl url: '' -version: 0.2.0 +version: 0.3.0 test_valid: False diff --git a/pyannote_audio/function.yaml b/pyannote_audio/function.yaml index 2e84fbd92..30870afa2 100644 --- a/pyannote_audio/function.yaml +++ b/pyannote_audio/function.yaml @@ -2,14 +2,14 @@ kind: job metadata: name: pyannote-audio tag: '' - hash: c45be8d7f51f0b2203155b08c307814a2cb0ac78 + hash: aed670a0534ebf30690dd2af7acad35595c7d5b1 project: '' labels: author: guyl categories: - deep-learning - - Huggingface - - Audio + - huggingface + - audio spec: command: '' args: [] diff --git a/pyannote_audio/item.yaml b/pyannote_audio/item.yaml index 7133ceb41..b69add9e6 100644 --- a/pyannote_audio/item.yaml +++ b/pyannote_audio/item.yaml @@ -1,8 +1,8 @@ apiVersion: v1 categories: - deep-learning -- Huggingface -- Audio +- huggingface +- audio description: pyannote's speech diarization of audio files doc: '' example: pyannote_audio.ipynb @@ -27,4 +27,4 @@ spec: - torchaudio - tqdm url: '' -version: 1.1.0 +version: 1.2.0 diff --git a/question_answering/function.yaml b/question_answering/function.yaml index a33614153..7491b17e9 100644 --- a/question_answering/function.yaml +++ b/question_answering/function.yaml @@ -2,11 +2,13 @@ kind: job metadata: name: question-answering tag: '' - hash: 90e67d116b256a98da7d5819724e43df01d8b4eb + hash: aed62db95f17576c69b457767e3595c2de1d5465 project: '' labels: author: yonish categories: + - genai + - huggingface - machine-learning spec: command: '' diff --git a/question_answering/item.yaml b/question_answering/item.yaml index 58ab5cc36..56fc5a5ec 100755 --- a/question_answering/item.yaml +++ b/question_answering/item.yaml @@ -1,5 +1,7 @@ apiVersion: v1 categories: +- genai +- huggingface - machine-learning description: GenAI approach of question answering on a given data doc: '' @@ -24,4 +26,4 @@ spec: - torch - tqdm url: '' -version: 0.3.1 +version: 0.4.0 diff --git a/silero_vad/function.yaml b/silero_vad/function.yaml index 0b4ad422b..8ec121a6b 100644 --- a/silero_vad/function.yaml +++ b/silero_vad/function.yaml @@ -2,14 +2,14 @@ kind: job metadata: name: silero-vad tag: '' - hash: 61b7a70c167b7819481fdabf9350fc6fa344d2f5 + hash: 59336f808643a74f3a2c5d506977387010427208 project: '' labels: author: guyl categories: - deep-learning - - PyTorch - - Audio + - pytorch + - audio spec: command: '' args: [] diff --git a/silero_vad/item.yaml b/silero_vad/item.yaml index 17c8eb62c..9ce9a5d2e 100644 --- a/silero_vad/item.yaml +++ b/silero_vad/item.yaml @@ -1,8 +1,8 @@ apiVersion: v1 categories: - deep-learning -- PyTorch -- Audio +- pytorch +- audio description: Silero VAD (Voice Activity Detection) functions. doc: '' example: silero_vad.ipynb @@ -27,4 +27,4 @@ spec: - tqdm - onnxruntime url: '' -version: 1.2.0 +version: 1.3.0 diff --git a/structured_data_generator/function.yaml b/structured_data_generator/function.yaml index 6f2039e4b..1093e178b 100644 --- a/structured_data_generator/function.yaml +++ b/structured_data_generator/function.yaml @@ -2,7 +2,7 @@ kind: job metadata: name: structured-data-generator tag: '' - hash: ac969f46aae91804024ea736856267c26578864b + hash: 44bb39f4bc55b38fc7ead1df24cb02bcf7f05bc9 project: '' labels: author: zeevr @@ -10,7 +10,7 @@ metadata: - machine-learning - data-preparation - data-generation - - GenAI + - genai spec: command: '' args: [] diff --git a/structured_data_generator/item.yaml b/structured_data_generator/item.yaml index 27e0e3fab..be2a2a948 100755 --- a/structured_data_generator/item.yaml +++ b/structured_data_generator/item.yaml @@ -3,7 +3,7 @@ categories: - machine-learning - data-preparation - data-generation -- GenAI +- genai description: GenAI approach of generating structured data according to a given schema doc: '' example: structured_data_generator.ipynb @@ -26,4 +26,4 @@ spec: - langchain - tqdm url: '' -version: 1.4.0 +version: 1.5.0 diff --git a/text_to_audio_generator/function.yaml b/text_to_audio_generator/function.yaml index df142d2ef..88ef9cb89 100644 --- a/text_to_audio_generator/function.yaml +++ b/text_to_audio_generator/function.yaml @@ -2,13 +2,14 @@ kind: job metadata: name: text-to-audio-generator tag: '' - hash: 534e34d316098dcb345860a786ea013102150e67 + hash: 89fcaf3fab53e7b7fbba448a5e65c253d7fa66ed project: '' labels: author: yonatans categories: - data-preparation - machine-learning + - pytorch spec: command: '' args: [] diff --git a/text_to_audio_generator/item.yaml b/text_to_audio_generator/item.yaml index 4784a80d2..efa8afc90 100644 --- a/text_to_audio_generator/item.yaml +++ b/text_to_audio_generator/item.yaml @@ -2,6 +2,7 @@ apiVersion: v1 categories: - data-preparation - machine-learning +- pytorch description: Generate audio file from text using different speakers doc: '' example: text_to_audio_generator.ipynb @@ -24,5 +25,5 @@ spec: - bark - torchaudio url: '' -version: 1.1.0 +version: 1.2.0 test_valid: True diff --git a/tf2_serving/function.yaml b/tf2_serving/function.yaml index a8fa7ce66..c755263ae 100644 --- a/tf2_serving/function.yaml +++ b/tf2_serving/function.yaml @@ -46,7 +46,7 @@ spec: - name: MODEL_CLASS value: TF2Model handler: tf2_serving:handler - runtime: python:3.6 + runtime: python:3.9 volumes: [] source: '' function_kind: serving \ No newline at end of file diff --git a/transcribe/function.yaml b/transcribe/function.yaml index 40dd2f0e6..d72751ad6 100644 --- a/transcribe/function.yaml +++ b/transcribe/function.yaml @@ -2,12 +2,14 @@ kind: job metadata: name: transcribe tag: '' - hash: 5cd620de67a936ee8a87cfc1f0b97e19730d0a69 + hash: 8810ac74045bd15cee15a2e4e89563e8e29908d3 project: '' labels: author: yonatans categories: - data-preparation + - genai + - huggingface - machine-learning spec: command: '' @@ -24,6 +26,7 @@ spec: - tqdm - torchaudio - torch + - accelerate entry_points: do_task: name: do_task diff --git a/transcribe/item.yaml b/transcribe/item.yaml index d53341ff2..7fddcf95e 100644 --- a/transcribe/item.yaml +++ b/transcribe/item.yaml @@ -1,6 +1,8 @@ apiVersion: v1 categories: - data-preparation +- genai +- huggingface - machine-learning description: Transcribe audio files into text files doc: '' @@ -27,4 +29,4 @@ spec: - torch - accelerate url: '' -version: 1.0.0 \ No newline at end of file +version: 1.1.0 \ No newline at end of file diff --git a/translate/function.yaml b/translate/function.yaml index 1a3fd7a88..bb1656103 100644 --- a/translate/function.yaml +++ b/translate/function.yaml @@ -2,13 +2,16 @@ kind: job metadata: name: translate tag: '' - hash: bc26313449cd13554a18106ed9893535fb79dd6e + hash: 7eedf684bcebfbfd964e5503afbb56335c8f4097 project: '' labels: author: guyl categories: - data-preparation + - huggingface - machine-learning + - deep-learning + - NLP spec: command: '' args: [] @@ -34,24 +37,27 @@ spec: - name: root_worker_inputs type: Dict[str, Any] default: null - outputs: - - default: '' + outputs: [] lineno: 56 + has_varargs: false + has_kwargs: false decorator: name: decorator doc: '' parameters: - name: handler - outputs: - - default: '' + outputs: [] lineno: 68 + has_varargs: false + has_kwargs: false wrapper: name: wrapper doc: '' parameters: [] - outputs: - - default: '' + outputs: [] lineno: 73 + has_varargs: false + has_kwargs: true translate: name: translate doc: 'Translate text files using a transformer model from Huggingface''s hub @@ -112,8 +118,10 @@ spec: default: false outputs: - doc: 'A tuple of:' - default: '' + type: Tuple[str, pd.DataFrame, dict] lineno: 135 + has_varargs: false + has_kwargs: false description: Translate text files from one language to another default_handler: translate disable_auto_mount: false diff --git a/translate/item.yaml b/translate/item.yaml index f85a55990..e63947349 100644 --- a/translate/item.yaml +++ b/translate/item.yaml @@ -1,6 +1,7 @@ apiVersion: v1 categories: - data-preparation +- huggingface - machine-learning - deep-learning - NLP @@ -28,5 +29,5 @@ spec: - torch - tqdm url: '' -version: 0.0.2 +version: 0.1.0 test_valid: True diff --git a/v2_model_server/function.yaml b/v2_model_server/function.yaml index 53fb00ea1..45d261b6a 100644 --- a/v2_model_server/function.yaml +++ b/v2_model_server/function.yaml @@ -70,14 +70,14 @@ spec: annotations: nuclio.io/generated_by: function generated from /home/michaell/projects/functions/v2_model_server/v2_model_server.py spec: - runtime: python:3.6 + runtime: python:3.9 handler: v2_model_server:handler env: [] volumes: [] build: commands: [] noBaseImagesPull: true - functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG1scnVuCgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmZyb20gdHlwaW5nIGltcG9ydCBMaXN0CmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmltcG9ydCBudW1weSBhcyBucAoKaW1wb3J0IHdhcm5pbmdzCgp3YXJuaW5ncy5maWx0ZXJ3YXJuaW5ncygiaWdub3JlIikKCgpjbGFzcyBDbGFzc2lmaWVyTW9kZWwobWxydW4uc2VydmluZy5WMk1vZGVsU2VydmVyKToKICAgIGRlZiBsb2FkKHNlbGYpOgogICAgICAgICIiImxvYWQgYW5kIGluaXRpYWxpemUgdGhlIG1vZGVsIGFuZC9vciBvdGhlciBlbGVtZW50cyIiIgogICAgICAgIG1vZGVsX2ZpbGUsIGV4dHJhX2RhdGEgPSBzZWxmLmdldF9tb2RlbCgiLnBrbCIpCiAgICAgICAgc2VsZi5tb2RlbCA9IGxvYWQob3Blbihtb2RlbF9maWxlLCAicmIiKSkKCiAgICBkZWYgcHJlZGljdChzZWxmLCBib2R5OiBkaWN0KSAtPiBMaXN0OgogICAgICAgICIiIkdlbmVyYXRlIG1vZGVsIHByZWRpY3Rpb25zIGZyb20gc2FtcGxlLiIiIgogICAgICAgIGZlYXRzID0gbnAuYXNhcnJheShib2R5WyJpbnB1dHMiXSkKICAgICAgICByZXN1bHQ6IG5wLm5kYXJyYXkgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMpCiAgICAgICAgcmV0dXJuIHJlc3VsdC50b2xpc3QoKQoKCmZyb20gbWxydW4ucnVudGltZXMgaW1wb3J0IG51Y2xpb19pbml0X2hvb2sKCgpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICJzZXJ2aW5nX3YyIikKCgpkZWYgaGFuZGxlcihjb250ZXh0LCBldmVudCk6CiAgICByZXR1cm4gY29udGV4dC5tbHJ1bl9oYW5kbGVyKGNvbnRleHQsIGV2ZW50KQoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg== + functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG1scnVuCgpmcm9tIGNsb3VkcGlja2xlIGltcG9ydCBsb2FkCmZyb20gdHlwaW5nIGltcG9ydCBMaXN0CmZyb20gc2tsZWFybi5kYXRhc2V0cyBpbXBvcnQgbG9hZF9pcmlzCmltcG9ydCBudW1weSBhcyBucAoKaW1wb3J0IHdhcm5pbmdzCgp3YXJuaW5ncy5maWx0ZXJ3YXJuaW5ncygiaWdub3JlIikKCgpjbGFzcyBDbGFzc2lmaWVyTW9kZWwobWxydW4uc2VydmluZy5WMk1vZGVsU2VydmVyKToKICAgIGRlZiBsb2FkKHNlbGYpOgogICAgICAgICIiImxvYWQgYW5kIGluaXRpYWxpemUgdGhlIG1vZGVsIGFuZC9vciBvdGhlciBlbGVtZW50cyIiIgogICAgICAgIG1vZGVsX2ZpbGUsIGV4dHJhX2RhdGEgPSBzZWxmLmdldF9tb2RlbCgiLnBrbCIpCiAgICAgICAgc2VsZi5tb2RlbCA9IGxvYWQob3Blbihtb2RlbF9maWxlLCAicmIiKSkKCiAgICBkZWYgcHJlZGljdChzZWxmLCBib2R5OiBkaWN0KSAtPiBMaXN0OgogICAgICAgICIiIkdlbmVyYXRlIG1vZGVsIHByZWRpY3Rpb25zIGZyb20gc2FtcGxlLiIiIgogICAgICAgIGZlYXRzID0gbnAuYXNhcnJheShib2R5WyJpbnB1dHMiXSkKICAgICAgICByZXN1bHQ6IG5wLm5kYXJyYXkgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMpCiAgICAgICAgcmV0dXJuIHJlc3VsdC50b2xpc3QoKQpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK source: '' function_kind: serving_v2 default_class: ClassifierModel diff --git a/v2_model_server/item.yaml b/v2_model_server/item.yaml index e0d6b0f96..7bde91a64 100644 --- a/v2_model_server/item.yaml +++ b/v2_model_server/item.yaml @@ -25,4 +25,4 @@ spec: kind: serving requirements: [] url: '' -version: 1.1.0 +version: 1.2.0 diff --git a/v2_model_server/v2_model_server.py b/v2_model_server/v2_model_server.py index dbaa72ef2..572f1680d 100644 --- a/v2_model_server/v2_model_server.py +++ b/v2_model_server/v2_model_server.py @@ -37,14 +37,3 @@ def predict(self, body: dict) -> List: feats = np.asarray(body["inputs"]) result: np.ndarray = self.model.predict(feats) return result.tolist() - - -from mlrun.runtimes import nuclio_init_hook - - -def init_context(context): - nuclio_init_hook(context, globals(), "serving_v2") - - -def handler(context, event): - return context.mlrun_handler(context, event)