Skip to content

Commit

Permalink
Merge pull request #289 from george0st/change
Browse files Browse the repository at this point in the history
TS204
  • Loading branch information
george0st authored Apr 19, 2024
2 parents b20bd5a + c58360d commit 291f0ec
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 6 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ The quality gate covers these test scenarios (✅ done, ✔ in-progress, ❌ pla
- ✅ TS201: Create feature set(s)
- ✅ TS202: Create feature set(s) & Ingest from DataFrame source (one step)
- ✅ TS203: Create feature set(s) & Ingest from CSV source (one step)
- TS204: Create feature set(s) & Ingest from Parquet source (one step)
- TS204: Create feature set(s) & Ingest from Parquet source (one step)
- ❌ TS205: Create feature set(s) & Ingest from SQL source (one step)
- ❌ TS206: Create feature set(s) & Ingest from Kafka source (one step)
- ❌ TS207: Create feature set(s) & Ingest from HTTP source (one step)
Expand Down
6 changes: 6 additions & 0 deletions docs/applied-limits.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,12 @@ NOTE: Solution, it is necessity to use WSL2 under OS Windows
- in case of e.g. more on-line targets, it is not possible to choose
relevant target for FeatureVector

## RedisNoSqlTarget

1. Issue with support **date** type
- see [combination of RedisNoSqlTarget and ParquetSource](https://github.com/mlrun/mlrun/issues/5447)


## SQLTarget

NOTE: It is in preview version, very limited with focus on MySQL only,
Expand Down
4 changes: 2 additions & 2 deletions qgate-sln-mlrun-private.env
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,11 @@ QGATE_DATASET = 01-size-100
#QGATE_FILTER_PROJECTS = agate-redis-parquet
#QGATE_FILTER_PROJECTS = agate-parquet
#QGATE_FILTER_PROJECTS = agate-redis-csv,agate-redis-parquet,agate-mysql-csv,agate-mysql-parquet,agate-kafka
QGATE_FILTER_PROJECTS = agate-mysql-csv
QGATE_FILTER_PROJECTS = agate-redis-csv

# List of test scenarios for testing e.g. TS201, etc. (it is important to keep TS dependencies).
# Default is empty list (all projects will be tested)
QGATE_FILTER_SCENARIOS = TS101, TS102, TS203
QGATE_FILTER_SCENARIOS = TS101, TS102, TS204

# Path to the output directory (as off-line storage, valid for target 'parquet' and 'csv')
# sample value e.g. ./output
Expand Down
6 changes: 3 additions & 3 deletions qgate_sln_mlrun/qualityreport.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import os
from qgate_sln_mlrun.setup import Setup, ProjectDelete
from qgate_sln_mlrun.output import Output
from qgate_sln_mlrun.ts import ts101, ts102, ts201, ts202, ts203, ts301, ts302, ts303, ts401, ts501, ts502, ts701, ts801
from qgate_sln_mlrun.ts import ts101, ts102, ts201, ts202, ts203, ts204, ts301, ts302, ts303, ts401, ts501, ts502, ts701, ts801
from qgate_sln_mlrun.ts import tsbase
import logging
import importlib.resources
Expand All @@ -15,7 +15,7 @@ class QualityReport:
"""

TEST_SCENARIOS = [ts101.TS101,
ts201.TS201, ts202.TS202, ts203.TS203,
ts201.TS201, ts202.TS202, ts203.TS203, ts204.TS204,
ts301.TS301, ts302.TS302, ts303.TS303,
ts401.TS401,
ts501.TS501, ts502.TS502]
Expand All @@ -31,7 +31,7 @@ class QualityReport:
TARGET_NOT_VALID_TEST = {"kafka": ["TS501", "TS502"]}

# Test vs Only On/Off-line
TEST_BOTH = ["TS101","TS102","TS201","TS301", "TS302", "TS303", "TS401"]
TEST_BOTH = ["TS101","TS102","TS201", "TS202", "TS203", "TS204", "TS301", "TS302", "TS303", "TS401"]
TEST_ONLY_OFFLINE = ["TS501","TS701","TS801"]
TEST_ONLY_ONLINE = ["TS502"]

Expand Down
72 changes: 72 additions & 0 deletions qgate_sln_mlrun/ts/ts204.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""
TS204: Create feature set(s) & Ingest from Parquet source (one step)
"""
from qgate_sln_mlrun.ts.tsbase import TSBase
import mlrun
import mlrun.feature_store as fstore
from mlrun.data_types.data_types import ValueType
from mlrun.datastore.sources import ParquetSource
from qgate_sln_mlrun.ts import ts201
import os
import json
import glob


class TS204(TSBase):

def __init__(self, solution):
super().__init__(solution, self.__class__.__name__)

@property
def desc(self) -> str:
return "Create feature set(s) & Ingest from Parquet source (one step)"

@property
def long_desc(self):
return ("Create feature set(s) & Ingest from Parquet source (one step)")

def exec(self, project_name):
""" Get or create featuresets"""

for featureset_name in self.get_featuresets(self.project_specs.get(project_name)):
# create file with definition of vector
source_file = os.path.join(os.getcwd(),
self.setup.model_definition,
"01-model",
"02-feature-set",
f"*-{featureset_name}.json")

for file in glob.glob(source_file):
# iterate cross all featureset definitions
with open(file, "r") as json_file:
self._create_featureset_ingest(f'{project_name}/{featureset_name}', project_name, json_file)

@TSBase.handler_testcase
def _create_featureset_ingest(self, testcase_name, project_name, json_file):
json_content = json.load(json_file)
name, desc, lbls, kind = TSBase.get_json_header(json_content)

if kind == "feature-set":

# create feature set based on the logic in TS201
ts=ts201.TS201(self._solution)
featureset=ts.create_featureset_content(project_name, f"{self.name}-{name}", desc, json_content['spec'])

# TODO: get the relevant data file
source_file = os.path.join(os.getcwd(),
self.setup.model_definition,
"02-data",
self.setup.dataset_name,
f"*-{name}.parquet")
for file in glob.glob(source_file):

fstore.ingest(featureset,
ParquetSource(name="tst", path=file),
# overwrite=False,
return_df=False,
# infer_options=mlrun.data_types.data_types.InferOptions.Null)
infer_options=mlrun.data_types.data_types.InferOptions.default())
# TODO: use InferOptions.Null with python 3.10 or focus on WSL
# NOTE: option default, change types
# NOTE: option Null, generate error with datetime in python 3.9

0 comments on commit 291f0ec

Please sign in to comment.