Simplified workflow configuration (#108)

* Add SQAaaS dynamic badge for dev branch (#104) * Add SQAaaS dynamic badge * Upgrade to sqaaas-assessment-action@v2 * Add draft example * UPDATE credits field * ADD docs * REFACTOR components and pipeline code * UPDATE docstring * UPDATE mnist torch uc * ADD config file parser draft * ADD itwinaiCLI and ConfigParser * ADD docs * ADD pipeline parser and serializer plus tests * UPDATE docs * ADD adapter component and tests (incl parser) * ADD splitter component, improve pipeline, tests * UPDATE test * REMOVE todos * ADD component tests * ADD serializer tests * FIX linter * ADD basic workflow tutorial * ADD basic intermediate tutorial * ADD advanced tutorial * UPDATE advanced tutorial * UPDATE use cases * UPDATE save parameters * FIX linter * FIX cyclones use case workflow --------- Co-authored-by: orviz <[email protected]>
interTwin-eu · Dec 13, 2023 · 8d9f51f · 8d9f51f
1 parent 087c7ec
commit 8d9f51f
Show file tree

Hide file tree

Showing 62 changed files with 2,784 additions and 895 deletions.
diff --git a/.github/workflows/sqaaas.yml b/.github/workflows/sqaaas.yml
@@ -4,26 +4,16 @@
 ---
 name: SQAaaS
 
-on: 
-  push: 
+on:
+  push:
     branches: [main, dev]
-  pull_request: 
+  pull_request:
     branches: [main, dev]
 
 jobs:
   sqaaas_job:
     runs-on: ubuntu-latest
     name: Job that triggers SQAaaS platform
     steps:
-      - name: Extract branch name
-        shell: bash
-        run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> "$GITHUB_OUTPUT"
-        id: extract_branch
-      - name: Print current branch name (debug)
-        shell: bash
-        run: echo running on branch ${{ steps.extract_branch.outputs.branch }}
       - name: SQAaaS assessment step
-        uses: eosc-synergy/sqaaas-assessment-action@v1
-        with:
-          repo: 'https://github.com/interTwin-eu/itwinai'
-          branch: ${{ steps.extract_branch.outputs.branch }}
+        uses: eosc-synergy/sqaaas-assessment-action@v2
diff --git a/README.md b/README.md
@@ -2,6 +2,7 @@
 
 [![GitHub Super-Linter](https://github.com/interTwin-eu/T6.5-AI-and-ML/actions/workflows/lint.yml/badge.svg)](https://github.com/marketplace/actions/super-linter)
 [![GitHub Super-Linter](https://github.com/interTwin-eu/T6.5-AI-and-ML/actions/workflows/check-links.yml/badge.svg)](https://github.com/marketplace/actions/markdown-link-check)
+ [![SQAaaS source code](https://github.com/EOSC-synergy/itwinai.assess.sqaaas/raw/dev/.badge/status_shields.svg)](https://sqaaas.eosc-synergy.eu/#/full-assessment/report/https://raw.githubusercontent.com/eosc-synergy/itwinai.assess.sqaaas/dev/.report/assessment_output.json)
 
 See the latest version of our [docs](https://intertwin-eu.github.io/T6.5-AI-and-ML/)
 for a quick overview of this platform for advanced AI/ML workflows in digital twin applications.
@@ -104,7 +105,7 @@ To run tests on itwinai package:
 # Activate env
 micromamba activate ./.venv-pytorch # or ./.venv-tf
 
-pytest -v -m "not slurm" tests/ 
+pytest -v -m "not slurm" tests/
 ```
 
 However, some tests are intended to be executed only on an HPC system,

diff --git a/experimental/cli/example.yaml b/experimental/cli/example.yaml
@@ -0,0 +1,9 @@
+server:
+  class_path: mycode.ServerOptions
+  init_args:
+    host: localhost
+    port: 80
+client:
+  class_path: mycode.ClientOptions
+  init_args:
+    url: http://${server.init_args.host}:${server.init_args.port}/
diff --git a/experimental/cli/itwinai-conf.yaml b/experimental/cli/itwinai-conf.yaml
@@ -0,0 +1,14 @@
+pipeline:
+  class_path: itwinai.pipeline.Pipeline
+  steps: [server, client]
+
+server:
+  class_path: mycode.ServerOptions
+  init_args:
+    host: localhost
+    port: 80
+
+client:
+  class_path: mycode.ClientOptions
+  init_args:
+    url: http://${server.init_args.host}:${server.init_args.port}/
diff --git a/experimental/cli/itwinaicli.py b/experimental/cli/itwinaicli.py
@@ -0,0 +1,29 @@
+"""
+>>> python itwinaicli.py --config itwinai-conf.yaml --help
+>>> python itwinaicli.py --config itwinai-conf.yaml --server.port 333
+"""
+
+
+from itwinai.parser import ConfigParser2
+from itwinai.parser import ItwinaiCLI
+
+cli = ItwinaiCLI()
+print(cli.pipeline)
+print(cli.pipeline.steps)
+print(cli.pipeline.steps['server'].port)
+
+
+parser = ConfigParser2(
+    config='itwinai-conf.yaml',
+    override_keys={
+        'server.init_args.port': 777
+    }
+)
+pipeline = parser.parse_pipeline()
+print(pipeline)
+print(pipeline.steps)
+print(pipeline.steps['server'].port)
+
+server = parser.parse_step('server')
+print(server)
+print(server.port)
diff --git a/experimental/cli/mycode.py b/experimental/cli/mycode.py
@@ -0,0 +1,35 @@
+# from dataclasses import dataclass
+from itwinai.components import BaseComponent
+
+
+class ServerOptions(BaseComponent):
+    host: str
+    port: int
+
+    def __init__(self, host: str, port: int) -> None:
+        self.host = host
+        self.port = port
+
+    def execute():
+        ...
+
+
+class ClientOptions(BaseComponent):
+    url: str
+
+    def __init__(self, url: str) -> None:
+        self.url = url
+
+    def execute():
+        ...
+
+
+class ServerOptions2(BaseComponent):
+    host: str
+    port: int
+
+    def __init__(self, client: ClientOptions) -> None:
+        self.client = client
+
+    def execute():
+        ...
diff --git a/experimental/cli/parser-bk.py b/experimental/cli/parser-bk.py
@@ -0,0 +1,46 @@
+"""
+Provide functionalities to manage configuration files, including parsing,
+execution, and dynamic override of fields.
+"""
+
+from typing import Any
+from jsonargparse import ArgumentParser, ActionConfigFile, Namespace
+
+from .components import BaseComponent
+
+
+class ItwinaiCLI:
+    _parser: ArgumentParser
+    pipeline: BaseComponent
+
+    def __init__(
+        self,
+        pipeline_nested_key: str = "pipeline",
+        args: Any = None,
+        parser_mode: str = "omegaconf"
+    ) -> None:
+        self.pipeline_nested_key = pipeline_nested_key
+        self.args = args
+        self.parser_mode = parser_mode
+        self._init_parser()
+        self._parse_args()
+        pipeline_inst = self._parser.instantiate_classes(self._config)
+        self.pipeline = pipeline_inst[self.pipeline_nested_key]
+
+    def _init_parser(self):
+        self._parser = ArgumentParser(parser_mode=self.parser_mode)
+        self._parser.add_argument(
+            "-c", "--config", action=ActionConfigFile,
+            required=True,
+            help="Path to a configuration file in json or yaml format."
+        )
+        self._parser.add_subclass_arguments(
+            baseclass=BaseComponent,
+            nested_key=self.pipeline_nested_key
+        )
+
+    def _parse_args(self):
+        if isinstance(self.args, (dict, Namespace)):
+            self._config = self._parser.parse_object(self.args)
+        else:
+            self._config = self._parser.parse_args(self.args)
diff --git a/experimental/cli/parser.py b/experimental/cli/parser.py
@@ -0,0 +1,29 @@
+"""
+Example of dynamic override of config files with (sub)class arguments,
+and variable interpolation with omegaconf.
+
+Run with:
+>>> python parser.py
+
+Or (after clearing the arguments in parse_args(...)):
+>>> python parser.py --config example.yaml --server.port 212
+See the help page of each class:
+>>> python parser.py --server.help mycode.ServerOptions
+"""
+
+from jsonargparse import ArgumentParser, ActionConfigFile
+from mycode import ServerOptions, ClientOptions
+
+if __name__ == "__main__":
+    parser = ArgumentParser(parser_mode="omegaconf")
+    parser.add_subclass_arguments(ServerOptions, "server")
+    parser.add_subclass_arguments(ClientOptions, "client")
+    parser.add_argument("--config", action=ActionConfigFile)
+
+    # Example of dynamic CLI override
+    # cfg = parser.parse_args(["--config=example.yaml", "--server.port=212"])
+    cfg = parser.parse_args()
+    cfg = parser.instantiate_classes(cfg)
+    print(cfg.client)
+    print(cfg.client.url)
+    print(cfg.server.port)
diff --git a/experimental/workflow/train.yaml b/experimental/workflow/train.yaml
@@ -0,0 +1,53 @@
+# AI workflow metadata/header.
+# They are optional and easily extensible in the future.
+version: 0.0.1
+name: Experiment name
+description: This is a textual description
+credits:
+ - author1
+ - author2
+
+# Provide a unified place where this *template* can be configured.
+# Variables which can be overridden at runtime as env vars, e.g.:
+# - Execution environment details (e.g., path in container vs. in laptop, MLFlow tracking URI)
+# - Tunable parameters (e.g., learning rate)
+# - Intrinsically dynamic values (e.g., MLFLow run ID is a random value)
+# These variables are interpolated with OmegaConf.
+vars:
+  images_dataset_path: some/path/disk
+  mlflow_tracking_uri: http://localhost:5000
+  training_lr: 0.001
+
+# Runner-independent workflow steps.
+# Each step is designed to be minimal, but easily extensible 
+# to accommodate future needs by adding new fields.
+# The only required field is 'command'. New fields can be added
+# to support future workflow executors.
+steps:
+  preprocessing-step:
+    command:
+      class_path: itwinai.torch.Preprocessor
+      init_args:
+        save_path: ${vars.images_dataset_path}
+    after: null 
+    env: null
+
+  training-step:
+    command:
+      class_path: itwinai.torch.Trainer
+      init_args:
+        lr: ${vars.training_lr}
+        tracking_uri: ${vars.mlflow_tracking_uri}
+    after: preprocessing-step
+    env: null
+
+  sth_step:
+    command: python inference.py -p pipeline.yaml
+    after: [preprocessing-step, training-step]
+    env: docker+ghcr.io/intertwin-eu/itwinai:training-0.0.1
+
+  sth_step2:
+    command: python train.py -p pipeline.yaml
+    after: null
+    env: conda+path/to/my/local/env
+
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,6 +34,8 @@ dependencies = [
     "submitit>=1.4.6",
     "typing-extensions==4.5.0",
     "typing_extensions==4.5.0",
+    "rich>=13.5.3",
+    "typer>=0.9.0",
     "urllib3>=1.26.18",
     "lightning>=2.0.0",
     "torchmetrics>=1.2.0",
@@ -45,7 +47,6 @@ dependencies = [
 # TODO: add torch and tensorflow
 # torch = []
 # tf = []
-cli = ["rich>=13.5.3", "typer>=0.9.0"]
 dev = [
     "pytest>=7.4.2",
     "pytest-mock>=3.11.1",

diff --git a/src/itwinai/cli.py b/src/itwinai/cli.py
@@ -10,12 +10,59 @@
 # NOTE: import libs in the command"s function, not here.
 # Otherwise this will slow the whole CLI.
 
+from typing import Optional, List
+from typing_extensions import Annotated
+from pathlib import Path
 import typer
 
 
 app = typer.Typer()
 
 
+@app.command()
+def exec_pipeline(
+    config: Annotated[Path, typer.Option(
+        help="Path to the configuration file of the pipeline to execute."
+    )],
+    pipe_key: Annotated[str, typer.Option(
+        help=("Key in the configuration file identifying "
+              "the pipeline object to execute.")
+    )] = "pipeline",
+    overrides_list: Annotated[
+        Optional[List[str]], typer.Option(
+            "--override", "-o",
+            help=(
+                "Nested key to dynamically override elements in the "
+                "configuration file with the "
+                "corresponding new value, joined by '='. It is also possible "
+                "to index elements in lists using their list index. "
+                "Example: [...] "
+                "-o pipeline.init_args.trainer.init_args.lr=0.001 "
+                "-o pipeline.my_list.2.batch_size=64 "
+            )
+        )
+    ] = None
+):
+    """Execute a pipeline from configuration file.
+    Allows dynamic override of fields.
+    """
+    # Add working directory to python path so that the interpreter is able
+    # to find the local python files imported from the pipeline file
+    import os
+    import sys
+    sys.path.append(os.getcwd())
+
+    # Parse and execute pipeline
+    from itwinai.parser import ConfigParser
+    overrides = {
+        k: v for k, v
+        in map(lambda x: (x.split('=')[0], x.split('=')[1]), overrides_list)
+    }
+    parser = ConfigParser(config=config, override_keys=overrides)
+    pipeline = parser.parse_pipeline(pipeline_nested_key=pipe_key)
+    pipeline.execute()
+
+
 @app.command()
 def mlflow_ui(
     path: str = typer.Option("ml-logs/", help="Path to logs storage."),