Skip to content

Commit

Permalink
Align to master branch (#826)
Browse files Browse the repository at this point in the history
* [Category] Fix and add categories to functions (#808)

* [Category] Fix and add categories to functions

* bump version in structured

* test is not valid in huggingface_serving

* Fix duplicated footer

* Fix duplicated footer

* revert python version change as it will be done in another PR

* comments

* comments

* Bump python:3.6 to python:3.9 (#810)

* [Describe] Align describe to new pandas version (#812)

* [Describe] Align describe to new pandas version

* minor test fix

* update mlrun version

* add dask to requirements

* remove dask

* update numpy version

* debug

* debug

* debug

* remove dask tests

* remove debug code

* [get_offline_features] Updated to mlrun 1.6.3 (#813)

* [Feature-selection] Replace matplotlib with plotly (#815)

* Iguazio-cicd user token updated

Iguazio-cicd user token updated in repo secrets:
https://github.com/mlrun/functions/settings/secrets/actions
MARKETPLACE_ACCESS_TOKEN_V3
new token gh...Zmf was set around April

* forcing iguazio-cicd auth

forcing iguazio-cicd to deal with Author identity unknown

* checkout@v3 to v4 and echo

* [Mlflow_utils] - mlflow model server (#811)

* mlflow server

* small fix to test

* small fixes to ms and nb

* small fixes to mlrun version

* update requirements lightgbm

* added req

* Added xgboost to req

---------

Co-authored-by: Avi Asulin <[email protected]>

* [Mlflow] Remove mlflow tag  (#825)

* remove mlflow tag

* remove mlflow tag

---------

Co-authored-by: Avi Asulin <[email protected]>

* align feature_selection yaml

---------

Co-authored-by: Avi Asulin <[email protected]>
Co-authored-by: Yonatan Shelach <[email protected]>
Co-authored-by: rokatyy <[email protected]>
Co-authored-by: Katerina Molchanova <[email protected]>
Co-authored-by: nashpaz123 <[email protected]>
Co-authored-by: ZeevRispler <[email protected]>
  • Loading branch information
7 people authored Sep 25, 2024
1 parent 8f3f226 commit 639bb27
Show file tree
Hide file tree
Showing 42 changed files with 1,860 additions and 309 deletions.
23 changes: 15 additions & 8 deletions .github/workflows/test-all.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}"
id: myref

- uses: actions/checkout@v3
- uses: actions/checkout@v4
- id: set-matrix
# This is very hacky, but it goes like that:
# 1) Associate base_ref with origin/base_ref since actions/checkout doesn't do it, if we don't do that we won't be able to check the actual diff
Expand Down Expand Up @@ -63,7 +63,7 @@ jobs:
steps:
# Source
- name: Checkout current repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
path: functions
# Install python 3.9
Expand Down Expand Up @@ -106,11 +106,11 @@ jobs:
run: echo "::set-output name=branch::${GITHUB_REF#refs/heads/}"
id: branch
- name: Checkout current repo
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
path: functions
- name: Checkout Marketplace
uses: actions/checkout@v3
uses: actions/checkout@v4
with:
repository: mlrun/marketplace
path: marketplace
Expand All @@ -136,6 +136,7 @@ jobs:
env:
GITHUB_TOKEN: ${{ secrets.MARKETPLACE_ACCESS_TOKEN_V3 }}
USERNAME: iguazio-cicd
USEREMAIL: [email protected]
REPO_PATH: marketplace
BASE_REPO: mlrun
BASE_BRANCH: master
Expand All @@ -153,24 +154,30 @@ jobs:
exit 1;
};
git config --local user.name $USERNAME
git config --local user.email $USEREMAIL
git branch --set-upstream-to origin/master
git remote -v
echo "Checking out [$BRANCH_NAME]..."
echo "1. Checking out [$BRANCH_NAME]..."
git checkout -b $BRANCH_NAME
echo "Checking out [$BASE_BRANCH]..."
echo "2. Checking out [$BASE_BRANCH]..."
git checkout $BASE_BRANCH
git pull
echo "Checking out [$BRANCH_NAME]..."
echo "3. Checking out [$BRANCH_NAME]..."
git checkout $BRANCH_NAME
echo "3a. merging"
git merge $BASE_BRANCH
echo "3b. status"
git status
git status --ignored
find . -type f | xargs ls -artl
echo "3b. add"
git add --all
git status
git status --ignored
echo "Commiting changes..."
echo "4. Commiting changes..."
echo "4a. git rev-parse"
git rev-parse --show-toplevel
echo "4b. git commit"
git commit -a -m "Automatically generated by github-worflow[bot] for commit: $COMMIT_SHA"
git status
git status --ignored
Expand Down
10 changes: 0 additions & 10 deletions churn_server/churn_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,13 +43,3 @@ def predict(self, body):
except Exception as e:
raise Exception("Failed to predict %s" % e)


from mlrun.runtimes import nuclio_init_hook


def init_context(context):
nuclio_init_hook(context, globals(), "serving_v2")


def handler(context, event):
return context.mlrun_handler(context, event)
4 changes: 2 additions & 2 deletions churn_server/function.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,14 +29,14 @@ spec:
annotations:
nuclio.io/generated_by: function generated from /User/functions/churn_server/churn_server.py
spec:
runtime: python:3.6
runtime: python:3.9
handler: churn_server:handler
env: []
volumes: []
build:
commands: []
noBaseImagesPull: true
functionSourceCode: IyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawoKCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgInNlcnZpbmdfdjIiKQoKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCgpmcm9tIG1scnVuLnJ1bnRpbWVzIGltcG9ydCBudWNsaW9faW5pdF9ob29rCmRlZiBpbml0X2NvbnRleHQoY29udGV4dCk6CiAgICBudWNsaW9faW5pdF9ob29rKGNvbnRleHQsIGdsb2JhbHMoKSwgJ3NlcnZpbmdfdjInKQoKZGVmIGhhbmRsZXIoY29udGV4dCwgZXZlbnQpOgogICAgcmV0dXJuIGNvbnRleHQubWxydW5faGFuZGxlcihjb250ZXh0LCBldmVudCkK
functionSourceCode: IyBDb3B5cmlnaHQgMjAxOSBJZ3VhemlvCiMKIyBMaWNlbnNlZCB1bmRlciB0aGUgQXBhY2hlIExpY2Vuc2UsIFZlcnNpb24gMi4wICh0aGUgIkxpY2Vuc2UiKTsKIyB5b3UgbWF5IG5vdCB1c2UgdGhpcyBmaWxlIGV4Y2VwdCBpbiBjb21wbGlhbmNlIHdpdGggdGhlIExpY2Vuc2UuCiMgWW91IG1heSBvYnRhaW4gYSBjb3B5IG9mIHRoZSBMaWNlbnNlIGF0CiMKIyAgICAgaHR0cDovL3d3dy5hcGFjaGUub3JnL2xpY2Vuc2VzL0xJQ0VOU0UtMi4wCiMKIyBVbmxlc3MgcmVxdWlyZWQgYnkgYXBwbGljYWJsZSBsYXcgb3IgYWdyZWVkIHRvIGluIHdyaXRpbmcsIHNvZnR3YXJlCiMgZGlzdHJpYnV0ZWQgdW5kZXIgdGhlIExpY2Vuc2UgaXMgZGlzdHJpYnV0ZWQgb24gYW4gIkFTIElTIiBCQVNJUywKIyBXSVRIT1VUIFdBUlJBTlRJRVMgT1IgQ09ORElUSU9OUyBPRiBBTlkgS0lORCwgZWl0aGVyIGV4cHJlc3Mgb3IgaW1wbGllZC4KIyBTZWUgdGhlIExpY2Vuc2UgZm9yIHRoZSBzcGVjaWZpYyBsYW5ndWFnZSBnb3Zlcm5pbmcgcGVybWlzc2lvbnMgYW5kCiMgbGltaXRhdGlvbnMgdW5kZXIgdGhlIExpY2Vuc2UuCiMKIyBHZW5lcmF0ZWQgYnkgbnVjbGlvLmV4cG9ydC5OdWNsaW9FeHBvcnRlcgoKaW1wb3J0IG51bXB5IGFzIG5wCmZyb20gY2xvdWRwaWNrbGUgaW1wb3J0IGxvYWQKCgppbXBvcnQgbWxydW4KCgpjbGFzcyBDaHVybk1vZGVsKG1scnVuLnNlcnZpbmcuVjJNb2RlbFNlcnZlcik6CiAgICBkZWYgbG9hZChzZWxmKToKICAgICAgICAiIiIKICAgICAgICBsb2FkIG11bHRpcGxlIG1vZGVscyBpbiBuZXN0ZWQgZm9sZGVycywgY2h1cm4gbW9kZWwgb25seQogICAgICAgICIiIgogICAgICAgIGNsZl9tb2RlbF9maWxlLCBleHRyYV9kYXRhID0gc2VsZi5nZXRfbW9kZWwoIi5wa2wiKQogICAgICAgIHNlbGYubW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNsZl9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgaWYgImNveCIgaW4gZXh0cmFfZGF0YS5rZXlzKCk6CiAgICAgICAgICAgIGNveF9tb2RlbF9maWxlID0gZXh0cmFfZGF0YVsiY294Il0KICAgICAgICAgICAgc2VsZi5jb3hfbW9kZWwgPSBsb2FkKG9wZW4oc3RyKGNveF9tb2RlbF9maWxlKSwgInJiIikpCiAgICAgICAgICAgIGlmICJjb3gva20iIGluIGV4dHJhX2RhdGEua2V5cygpOgogICAgICAgICAgICAgICAga21fbW9kZWxfZmlsZSA9IGV4dHJhX2RhdGFbImNveC9rbSJdCiAgICAgICAgICAgICAgICBzZWxmLmttX21vZGVsID0gbG9hZChvcGVuKHN0cihrbV9tb2RlbF9maWxlKSwgInJiIikpCgogICAgZGVmIHByZWRpY3Qoc2VsZiwgYm9keSk6CiAgICAgICAgdHJ5OgogICAgICAgICAgICBmZWF0cyA9IG5wLmFzYXJyYXkoYm9keVsiaW5wdXRzIl0sIGR0eXBlPW5wLmZsb2F0MzIpLnJlc2hhcGUoLTEsIDIzKQogICAgICAgICAgICByZXN1bHQgPSBzZWxmLm1vZGVsLnByZWRpY3QoZmVhdHMsIHZhbGlkYXRlX2ZlYXR1cmVzPUZhbHNlKQogICAgICAgICAgICByZXR1cm4gcmVzdWx0LnRvbGlzdCgpCiAgICAgICAgZXhjZXB0IEV4Y2VwdGlvbiBhcyBlOgogICAgICAgICAgICByYWlzZSBFeGNlcHRpb24oIkZhaWxlZCB0byBwcmVkaWN0ICVzIiAlIGUpCgoKZnJvbSBtbHJ1bi5ydW50aW1lcyBpbXBvcnQgbnVjbGlvX2luaXRfaG9vawpkZWYgaW5pdF9jb250ZXh0KGNvbnRleHQpOgogICAgbnVjbGlvX2luaXRfaG9vayhjb250ZXh0LCBnbG9iYWxzKCksICdzZXJ2aW5nX3YyJykKCmRlZiBoYW5kbGVyKGNvbnRleHQsIGV2ZW50KToKICAgIHJldHVybiBjb250ZXh0Lm1scnVuX2hhbmRsZXIoY29udGV4dCwgZXZlbnQpCg==
source: ''
function_kind: serving_v2
default_class: ChurnModel
Expand Down
2 changes: 1 addition & 1 deletion churn_server/item.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,4 @@ spec:
- xgboost==1.3.1
- lifelines==0.22.8
url: ''
version: 1.1.0
version: 1.2.0
39 changes: 20 additions & 19 deletions describe/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
)
from mlrun.datastore import DataItem
from mlrun.execution import MLClientCtx
from mlrun.feature_store import FeatureSet, FeatureVector
from mlrun.feature_store import FeatureSet
from plotly.subplots import make_subplots

pd.set_option("display.float_format", lambda x: "%.2f" % x)
Expand Down Expand Up @@ -234,24 +234,24 @@ def _create_features_histogram_artifacts(
if label_column is not None and problem_type == "classification":
all_labels = df[label_column].unique()
visible = True
for (columnName, _) in df.iteritems():
if columnName == label_column:
for column_name in df.columns:
if column_name == label_column:
continue

if label_column is not None and problem_type == "classification":
for label in all_labels:
sub_fig = go.Histogram(
histfunc="count",
x=df.loc[df[label_column] == label][columnName],
x=df.loc[df[label_column] == label][column_name],
name=str(label),
visible=visible,
)
figs[f"{columnName}@?@{label}"] = sub_fig
figs[f"{column_name}@?@{label}"] = sub_fig
else:
sub_fig = go.Histogram(histfunc="count", x=df[columnName], visible=visible)
figs[f"{columnName}@?@{1}"] = sub_fig
sub_fig = go.Histogram(histfunc="count", x=df[column_name], visible=visible)
figs[f"{column_name}@?@{1}"] = sub_fig
if visible:
first_feature_name = columnName
first_feature_name = column_name
visible = False

fig = go.Figure()
Expand Down Expand Up @@ -338,7 +338,7 @@ def _create_features_2d_scatter_artifacts(
Create and log a scatter-2d artifact for each couple of features
"""
features = [
columnName for (columnName, _) in df.iteritems() if columnName != label_column
column_name for column_name in df.columns if column_name != label_column
]
max_feature_len = float(max(len(elem) for elem in features))
if label_column is not None:
Expand Down Expand Up @@ -450,11 +450,12 @@ def _create_violin_artifact(

plot_num = 0

for (columnName, columnData) in df.iteritems():
for column_name in df.columns:
column_data = df[column_name]
violin = go.Violin(
x=[columnName] * columnData.shape[0],
y=columnData,
name=columnName,
x=[column_name] * column_data.shape[0],
y=column_data,
name=column_name,
)

fig.add_trace(
Expand Down Expand Up @@ -491,15 +492,15 @@ def _create_imbalance_artifact(
"""
if label_column:
if problem_type == "classification":
values_column = "count"
labels_count = df[label_column].value_counts().sort_index()
df_labels_count = pd.DataFrame(labels_count)
df_labels_count.rename(columns={label_column: "Total"}, inplace=True)
df_labels_count[label_column] = labels_count.index
df_labels_count["weights"] = df_labels_count["Total"] / sum(
df_labels_count["Total"]
df_labels_count.rename(columns={"": values_column}, inplace=True)
df_labels_count[values_column] = df_labels_count[values_column] / sum(
df_labels_count[values_column]
)

fig = px.pie(df_labels_count, names=label_column, values="Total")
fig = px.pie(df_labels_count, names=label_column, values=values_column)
else:
fig = px.histogram(
histfunc="count",
Expand Down Expand Up @@ -532,7 +533,7 @@ def _create_corr_artifact(
"""
if label_column is not None:
df = df.drop([label_column], axis=1)
tblcorr = df.corr()
tblcorr = df.corr(numeric_only=True)
extra_data["correlation-matrix-csv"] = context.log_artifact(
TableArtifact("correlation-matrix-csv", df=tblcorr, visible=True),
local_path=f"{plots_dest}/correlation-matrix.csv",
Expand Down
96 changes: 43 additions & 53 deletions describe/function.yaml

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions describe/item.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ labels:
author: Davids
maintainers: []
marketplaceType: ''
mlrunVersion: 1.4.1
mlrunVersion: 1.6.0
name: describe
platformVersion: 3.5.3
spec:
Expand All @@ -21,4 +21,4 @@ spec:
kind: job
requirements: []
url: ''
version: 1.2.0
version: 1.3.0
1 change: 0 additions & 1 deletion describe/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
scikit-learn~=1.0.2
plotly~=5.16.1
pytest~=7.0.1
pandas~=1.3.5
matplotlib~=3.5.1
seaborn~=0.11.2
76 changes: 0 additions & 76 deletions describe/test_describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,79 +271,3 @@ def _create_data(n_samples, n_features, n_classes, n_informative, reg=False):
df["timestamp"] = [pd.Timestamp("2022").now()] * n_samples
df.to_parquet("artifacts/random_dataset.parquet")
return df


def _create_dask_func(uri):
dask_cluster_name = "dask-cluster"
dask_cluster = new_function(dask_cluster_name, kind="dask", image="mlrun/ml-models")
dask_cluster.spec.remote = False
dask_uri = uri
dask_cluster.export(dask_uri)


def test_import_function_describe_dask():
dask_uri = "dask_func.yaml"
_create_dask_func(dask_uri)
describe_func = import_function("function.yaml")
is_test_passed = True
_create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3)
describe_func.spec.command = "describe_dask.py"

try:
describe_run = describe_func.run(
name="task-describe",
handler="analyze",
inputs={"table": DATA_PATH},
params={
"label_column": "label",
"dask_function": dask_uri,
"dask_flag": True,
},
artifact_path=os.path.abspath("./artifacts"),
local=True,
)

except Exception as exception:
print(f"- The test failed - raised the following error:\n- {exception}")
is_test_passed = False
_validate_paths(
{
"imbalance.html",
"imbalance-weights-vec.csv",
}
)
assert is_test_passed


def test_code_to_function_describe_dask():
dask_uri = "dask_func.yaml"
_create_dask_func(dask_uri)
describe_func = code_to_function(filename="describe.py", kind="local")
is_test_passed = True
_create_data(n_samples=100, n_features=5, n_classes=3, n_informative=3)
describe_func.spec.command = "describe_dask.py"

try:
describe_run = describe_func.run(
name="task-describe",
handler="analyze",
inputs={"table": DATA_PATH},
params={
"label_column": "label",
"dask_function": dask_uri,
"dask_flag": True,
},
artifact_path=os.path.abspath("./artifacts"),
local=True,
)

except Exception as exception:
print(f"- The test failed - raised the following error:\n- {exception}")
is_test_passed = False
_validate_paths(
{
"imbalance.html",
"imbalance-weights-vec.csv",
}
)
assert is_test_passed
Loading

0 comments on commit 639bb27

Please sign in to comment.