Align to master branch (#826)

* [Category] Fix and add categories to functions (#808) * [Category] Fix and add categories to functions * bump version in structured * test is not valid in huggingface_serving * Fix duplicated footer * Fix duplicated footer * revert python version change as it will be done in another PR * comments * comments * Bump python:3.6 to python:3.9 (#810) * [Describe] Align describe to new pandas version (#812) * [Describe] Align describe to new pandas version * minor test fix * update mlrun version * add dask to requirements * remove dask * update numpy version * debug * debug * debug * remove dask tests * remove debug code * [get_offline_features] Updated to mlrun 1.6.3 (#813) * [Feature-selection] Replace matplotlib with plotly (#815) * Iguazio-cicd user token updated Iguazio-cicd user token updated in repo secrets: https://github.com/mlrun/functions/settings/secrets/actions MARKETPLACE_ACCESS_TOKEN_V3 new token gh...Zmf was set around April * forcing iguazio-cicd auth forcing iguazio-cicd to deal with Author identity unknown * checkout@v3 to v4 and echo * [Mlflow_utils] - mlflow model server (#811) * mlflow server * small fix to test * small fixes to ms and nb * small fixes to mlrun version * update requirements lightgbm * added req * Added xgboost to req --------- Co-authored-by: Avi Asulin <[email protected]> * [Mlflow] Remove mlflow tag (#825) * remove mlflow tag * remove mlflow tag --------- Co-authored-by: Avi Asulin <[email protected]> * align feature_selection yaml --------- Co-authored-by: Avi Asulin <[email protected]> Co-authored-by: Yonatan Shelach <[email protected]> Co-authored-by: rokatyy <[email protected]> Co-authored-by: Katerina Molchanova <[email protected]> Co-authored-by: nashpaz123 <[email protected]> Co-authored-by: ZeevRispler <[email protected]>
mlrun · Sep 25, 2024 · 639bb27 · 639bb27
1 parent 8f3f226
commit 639bb27
Show file tree

Hide file tree

Showing 42 changed files with 1,860 additions and 309 deletions.
diff --git a/.github/workflows/test-all.yaml b/.github/workflows/test-all.yaml
@@ -15,7 +15,7 @@ jobs:
         run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}"
         id: myref
 
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - id: set-matrix
         # This is very hacky, but it goes like that:
         # 1) Associate base_ref with origin/base_ref since actions/checkout doesn't do it, if we don't do that we won't be able to check the actual diff
@@ -63,7 +63,7 @@ jobs:
     steps:
       # Source
       - name: Checkout current repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           path: functions
       # Install python 3.9
@@ -106,11 +106,11 @@ jobs:
         run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}"
         id: branch
       - name: Checkout current repo
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           path: functions
       - name: Checkout Marketplace
-        uses: actions/checkout@v3
+        uses: actions/checkout@v4
         with:
           repository: mlrun/marketplace
           path: marketplace
@@ -136,6 +136,7 @@ jobs:
         env:
           GITHUB_TOKEN: ${{ secrets.MARKETPLACE_ACCESS_TOKEN_V3 }}
           USERNAME: iguazio-cicd
+          USEREMAIL: [email protected]
           REPO_PATH: marketplace
           BASE_REPO: mlrun
           BASE_BRANCH: master
@@ -153,24 +154,30 @@ jobs:
               exit 1;
           };
           git config --local user.name $USERNAME
+          git config --local user.email $USEREMAIL
           git branch --set-upstream-to origin/master
           git remote -v
-          echo "Checking out [$BRANCH_NAME]..."
+          echo "1. Checking out [$BRANCH_NAME]..."
           git checkout -b $BRANCH_NAME
-          echo "Checking out [$BASE_BRANCH]..."
+          echo "2. Checking out [$BASE_BRANCH]..."
           git checkout $BASE_BRANCH
           git pull
-          echo "Checking out [$BRANCH_NAME]..."
+          echo "3. Checking out [$BRANCH_NAME]..."
           git checkout $BRANCH_NAME
+          echo "3a. merging"
           git merge $BASE_BRANCH
+          echo "3b. status"
           git status
           git status --ignored
           find . -type f | xargs ls -artl
+          echo "3b. add"
           git add --all 
           git status
           git status --ignored
-          echo "Commiting changes..."
+          echo "4. Commiting changes..."
+          echo "4a. git rev-parse"
           git rev-parse --show-toplevel
+          echo "4b. git commit"
           git commit -a -m "Automatically generated by github-worflow[bot] for commit: $COMMIT_SHA"
           git status
           git status --ignored

diff --git a/churn_server/churn_server.py b/churn_server/churn_server.py
@@ -43,13 +43,3 @@ def predict(self, body):
         except Exception as e:
             raise Exception("Failed to predict %s" % e)
 
-
-from mlrun.runtimes import nuclio_init_hook
-
-
-def init_context(context):
-    nuclio_init_hook(context, globals(), "serving_v2")
-
-
-def handler(context, event):
-    return context.mlrun_handler(context, event)
diff --git a/churn_server/function.yaml b/churn_server/function.yaml
@@ -29,14 +29,14 @@ spec:
       annotations:
         nuclio.io/generated_by: function generated from /User/functions/churn_server/churn_server.py
     spec:
-      runtime: python:3.6
+      runtime: python:3.9
       handler: churn_server:handler
       env: []
       volumes: []
       build:
         commands: []
         noBaseImagesPull: true
-        functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawoKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgInNlcnZpbmdfdjIiKQoKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK
+        functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg==
   source: ''
   function_kind: serving_v2
   default_class: ChurnModel

diff --git a/churn_server/item.yaml b/churn_server/item.yaml
@@ -29,4 +29,4 @@ spec:
   - xgboost==1.3.1
   - lifelines==0.22.8
 url: ''
-version: 1.1.0
+version: 1.2.0
diff --git a/describe/describe.py b/describe/describe.py
@@ -36,7 +36,7 @@
 )
 from mlrun.datastore import DataItem
 from mlrun.execution import MLClientCtx
-from mlrun.feature_store import FeatureSet, FeatureVector
+from mlrun.feature_store import FeatureSet
 from plotly.subplots import make_subplots
 
 pd.set_option("display.float_format", lambda x: "%.2f" % x)
@@ -234,24 +234,24 @@ def _create_features_histogram_artifacts(
     if label_column is not None and problem_type == "classification":
         all_labels = df[label_column].unique()
     visible = True
-    for (columnName, _) in df.iteritems():
-        if columnName == label_column:
+    for column_name in df.columns:
+        if column_name == label_column:
             continue
 
         if label_column is not None and problem_type == "classification":
             for label in all_labels:
                 sub_fig = go.Histogram(
                     histfunc="count",
-                    x=df.loc[df[label_column] == label][columnName],
+                    x=df.loc[df[label_column] == label][column_name],
                     name=str(label),
                     visible=visible,
                 )
-                figs[f"{columnName}@?@{label}"] = sub_fig
+                figs[f"{column_name}@?@{label}"] = sub_fig
         else:
-            sub_fig = go.Histogram(histfunc="count", x=df[columnName], visible=visible)
-            figs[f"{columnName}@?@{1}"] = sub_fig
+            sub_fig = go.Histogram(histfunc="count", x=df[column_name], visible=visible)
+            figs[f"{column_name}@?@{1}"] = sub_fig
         if visible:
-            first_feature_name = columnName
+            first_feature_name = column_name
         visible = False
 
     fig = go.Figure()
@@ -338,7 +338,7 @@ def _create_features_2d_scatter_artifacts(
     Create and log a scatter-2d artifact for each couple of features
     """
     features = [
-        columnName for (columnName, _) in df.iteritems() if columnName != label_column
+        column_name for column_name in df.columns if column_name != label_column
     ]
     max_feature_len = float(max(len(elem) for elem in features))
     if label_column is not None:
@@ -450,11 +450,12 @@ def _create_violin_artifact(
 
     plot_num = 0
 
-    for (columnName, columnData) in df.iteritems():
+    for column_name in df.columns:
+        column_data = df[column_name]
         violin = go.Violin(
-            x=[columnName] * columnData.shape[0],
-            y=columnData,
-            name=columnName,
+            x=[column_name] * column_data.shape[0],
+            y=column_data,
+            name=column_name,
         )
 
         fig.add_trace(
@@ -491,15 +492,15 @@ def _create_imbalance_artifact(
     """
     if label_column:
         if problem_type == "classification":
+            values_column = "count"
             labels_count = df[label_column].value_counts().sort_index()
             df_labels_count = pd.DataFrame(labels_count)
-            df_labels_count.rename(columns={label_column: "Total"}, inplace=True)
             df_labels_count[label_column] = labels_count.index
-            df_labels_count["weights"] = df_labels_count["Total"] / sum(
-                df_labels_count["Total"]
+            df_labels_count.rename(columns={"": values_column}, inplace=True)
+            df_labels_count[values_column] = df_labels_count[values_column] / sum(
+                df_labels_count[values_column]
             )
-
-            fig = px.pie(df_labels_count, names=label_column, values="Total")
+            fig = px.pie(df_labels_count, names=label_column, values=values_column)
         else:
             fig = px.histogram(
                 histfunc="count",
@@ -532,7 +533,7 @@ def _create_corr_artifact(
     """
     if label_column is not None:
         df = df.drop([label_column], axis=1)
-    tblcorr = df.corr()
+    tblcorr = df.corr(numeric_only=True)
     extra_data["correlation-matrix-csv"] = context.log_artifact(
         TableArtifact("correlation-matrix-csv", df=tblcorr, visible=True),
         local_path=f"{plots_dest}/correlation-matrix.csv",

diff --git a/describe/function.yaml b/describe/function.yaml
diff --git a/describe/item.yaml b/describe/item.yaml
@@ -11,7 +11,7 @@ labels:
   author: Davids
 maintainers: []
 marketplaceType: ''
-mlrunVersion: 1.4.1
+mlrunVersion: 1.6.0
 name: describe
 platformVersion: 3.5.3
 spec:
@@ -21,4 +21,4 @@ spec:
   kind: job
   requirements: []
 url: ''
-version: 1.2.0
+version: 1.3.0
diff --git a/describe/requirements.txt b/describe/requirements.txt
@@ -1,6 +1,5 @@
 scikit-learn~=1.0.2
 plotly~=5.16.1
 pytest~=7.0.1
-pandas~=1.3.5
 matplotlib~=3.5.1
 seaborn~=0.11.2
diff --git a/describe/test_describe.py b/describe/test_describe.py
@@ -271,79 +271,3 @@ def _create_data(n_samples, n_features, n_classes, n_informative, reg=False):
     df["timestamp"] = [pd.Timestamp("2022").now()] * n_samples
     df.to_parquet("artifacts/random_dataset.parquet")
     return df
-
-
-def _create_dask_func(uri):
-    dask_cluster_name = "dask-cluster"
-    dask_cluster = new_function(dask_cluster_name, kind="dask", image="mlrun/ml-models")
-    dask_cluster.spec.remote = False
-    dask_uri = uri
-    dask_cluster.export(dask_uri)
-
-
-def test_import_function_describe_dask():
-    dask_uri = "dask_func.yaml"
-    _create_dask_func(dask_uri)
-    describe_func = import_function("function.yaml")
-    is_test_passed = True
-    _create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3)
-    describe_func.spec.command = "describe_dask.py"
-
-    try:
-        describe_run = describe_func.run(
-            name="task-describe",
-            handler="analyze",
-            inputs={"table": DATA_PATH},
-            params={
-                "label_column": "label",
-                "dask_function": dask_uri,
-                "dask_flag": True,
-            },
-            artifact_path=os.path.abspath("./artifacts"),
-            local=True,
-        )
-
-    except Exception as exception:
-        print(f"- The test failed - raised the following error:\n- {exception}")
-        is_test_passed = False
-    _validate_paths(
-        {
-            "imbalance.html",
-            "imbalance-weights-vec.csv",
-        }
-    )
-    assert is_test_passed
-
-
-def test_code_to_function_describe_dask():
-    dask_uri = "dask_func.yaml"
-    _create_dask_func(dask_uri)
-    describe_func = code_to_function(filename="describe.py", kind="local")
-    is_test_passed = True
-    _create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3)
-    describe_func.spec.command = "describe_dask.py"
-
-    try:
-        describe_run = describe_func.run(
-            name="task-describe",
-            handler="analyze",
-            inputs={"table": DATA_PATH},
-            params={
-                "label_column": "label",
-                "dask_function": dask_uri,
-                "dask_flag": True,
-            },
-            artifact_path=os.path.abspath("./artifacts"),
-            local=True,
-        )
-
-    except Exception as exception:
-        print(f"- The test failed - raised the following error:\n- {exception}")
-        is_test_passed = False
-    _validate_paths(
-        {
-            "imbalance.html",
-            "imbalance-weights-vec.csv",
-        }
-    )
-    assert is_test_passed