diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 345260b7..25810b98 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -9,12 +9,19 @@ on: machines_config: description: 'Machine related configurations' required: True + default: 67504e9a4c9ccbdde21a46fe benchmark_config: description: 'Applcation related configuration' required: True + default: 67504e9a4c9ccbdde21a4701 + plots_config: + description: 'Plots related configuration' + required: True + default: 675053424c9ccbdde21a470a girder_folder_id: description: 'ID of the folder to upload to' required: True + default: 67504ecd4c9ccbdde21a4704 jobs: @@ -68,25 +75,28 @@ jobs: girder-download -gid $machine_cfg_id -o ./tmp/ -fn "machines_config.json" env: GIRDER_API_KEY: ${{secrets.GIRDER}} - - id: hpc-systems - name: Set HPC systems matrix - run: | - source .venv/bin/activate - matrix=$(hpc-dispatch -mp ./tmp/machines_config.json -o ./tmp/machines/) - echo $matrix - echo "matrix={ include : $matrix }" >> $GITHUB_OUTPUT - name: Donwload benchmark configuration run: | source .venv/bin/activate if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then bench_cfg_id=${{ github.event.inputs.benchmark_config }}; + plots_cfg_id=${{ github.event.inputs.plots_config }}; elif [[ "${{ github.event_name}}" == "repository_dispatch" ]]; then bench_cfg_id=${{ github.event.client_payload.benchmark_config }}; + plots_cfg_id=${{ github.event.client_payload.plots_config }}; fi girder-download -gid $bench_cfg_id -o ./tmp/ -fn "benchmark_config.json" + girder-download -gid $plots_cfg_id -o ./tmp/ -fn "plots.json" env: GIRDER_API_KEY: ${{secrets.GIRDER}} + - id: hpc-systems + name: Set HPC systems matrix + run: | + source .venv/bin/activate + matrix=$(hpc-dispatch -mcp ./tmp/machines_config.json -mod ./tmp/machines/ -bcp ./tmp/benchmark_config.json -pcp ./tmp/plots.json) + echo $matrix + echo "matrix={ include : $matrix }" >> $GITHUB_OUTPUT - name: pull_images run: | source .venv/bin/activate @@ -102,6 +112,7 @@ jobs: name: config-artifacts path: | ./tmp/benchmark_config.json + ./tmp/plots.json ./tmp/machines/ benchmark: @@ -113,6 +124,7 @@ jobs: timeout-minutes: 7200 name: ${{matrix.machine}} steps: + - uses: actions/checkout@v4 - name: Download wheel uses: actions/download-artifact@v4 with: @@ -123,20 +135,13 @@ jobs: with: name: config-artifacts path: ./tmp/ - - name: Create Virtual Environment - run: | - python3 -m venv .venv - source .venv/bin/activate - pip3 install -r requirements.txt - name: Execute benchmarks - run: | - source .venv/bin/activate - execute-benchmark -ec ./${{matrix.machine_cfg}} --config ./tmp/benchmark_config.json --move-results ./tmp/results/ -v + run: ${{matrix.submit_command}} - name: Upload reframe report uses: actions/upload-artifact@v4 with: - name: benchmark-results - path: ./tmp/results/ + name: benchmark-results-${{matrix.machine}} + path: ${{matrix.reports_path}} results: runs-on: self-ubuntu-22.04 @@ -148,8 +153,9 @@ jobs: - name: Download results uses: actions/download-artifact@v4 with: - name: benchmark-results + pattern: benchmark-results-* path: ./tmp/results/ + merge-multiple: false - name: Create Virtual Environment run: | python3 -m venv .venv @@ -168,28 +174,4 @@ jobs: girder-upload --directory $new_foldername --girder_id $girder_upload_id rm -r $new_foldername env: - GIRDER_API_KEY: ${{ secrets.GIRDER }} - - name: Reset reports - run: | - rm -r ./docs/modules/ROOT/pages/applications/ - rm -r ./docs/modules/ROOT/pages/machines/ - rm -r ./docs/modules/ROOT/pages/reports/ - rm -r ./docs/modules/ROOT/pages/use_cases/ - rm -r ./reports/ - - name: Render reports - run: | - source .venv/bin/activate - render-benchmarks - env: - GIRDER_API_KEY: ${{ secrets.GIRDER }} - - - name: Create Pull Request - uses: peter-evans/create-pull-request@v7 - with: - title: "Add benchmark for ${{ needs.factory.outputs.executable_name }} - ${{ needs.factory.outputs.use_case }}" - body: | - Auto-generated by [create-pull-request][1] - [1]: https://github.com/peter-evans/create-pull-request - reviewers: JavierCladellas - env: - GITHUB_TOKEN: ${{ secrets.CR_PAT }} \ No newline at end of file + GIRDER_API_KEY: ${{ secrets.GIRDER }} \ No newline at end of file diff --git a/config/toolbox_heat/thermal_bridges_case_3.json b/config/toolbox_heat/thermal_bridges_case_3.json index b2e0da5e..40992688 100644 --- a/config/toolbox_heat/thermal_bridges_case_3.json +++ b/config/toolbox_heat/thermal_bridges_case_3.json @@ -1,17 +1,18 @@ { "executable": "feelpp_toolbox_heat", - "output_directory": "{{machine.output_app_dir}}/toolboxes/heat/thermal_bridges_case_3", - "use_case_name": "thermal_bridges_case_3", - "timeout":"0-01:00:00", + "output_directory": "{{machine.output_app_dir}}/toolboxes/heat/ThermalBridgesENISO10211/Case3", + "use_case_name": "ThermalBridgesENISO10211", + "timeout":"0-00:10:00", "platforms": { "apptainer":{ "image": { - "name":"{{machine.containers.apptainer.image_base_dir}}/feelpp.sif" + "name":"{{machine.containers.apptainer.image_base_dir}}/feelpp-noble.sif" }, "input_dir":"/input_data/", "options": [ "--home {{machine.output_app_dir}}", - "--bind {{machine.input_dataset_base_dir}}/{{use_case_name}}/:{{platforms.apptainer.input_dir}}" + "--bind {{machine.input_dataset_base_dir}}/{{use_case_name}}/:{{platforms.apptainer.input_dir}}", + "--env OMP_NUM_THREADS=1" ], "append_app_option":[] }, @@ -21,15 +22,17 @@ } }, "options": [ - "--config-files {{platforms.{{machine.platform}}.input_dir}}/case3.cfg", + "--config-files /usr/share/feelpp/data/testcases/toolboxes/heat/cases/Building/ThermalBridgesENISO10211/case3.cfg {{platforms.{{machine.platform}}.input_dir}}/{{parameters.solver.value}}.cfg", "--directory {{output_directory}}/{{instance}}", "--repository.case {{use_case_name}}", - "--fail-on-unknown-option 1", "--heat.scalability-save=1", "--repository.append.np 0", "--case.discretization {{parameters.discretization.value}}", - "--heat.json.patch='{\"op\": \"replace\",\"path\": \"/Meshes/heat/Import/filename\",\"value\": \"$cfgdir/{{parameters.meshes.value}}/case3_p{{parameters.nb_tasks.tasks.value}}.json\" }'" + "--heat.json.patch='{\"op\": \"replace\",\"path\": \"/Meshes/heat/Import/filename\",\"value\": \"{{platforms.{{machine.platform}}.input_dir}}/partitioning/case3/{{parameters.meshes.value}}/case3_p{{parameters.nb_tasks.tasks.value}}.json\" }'" ], + "env_variables":{ + "OMP_NUM_THREADS":1 + }, "outputs": [ { "filepath": "{{output_directory}}/{{instance}}/{{use_case_name}}/heat.measures/values.csv", @@ -67,7 +70,7 @@ { "name": "nb_tasks", "sequence": [ - {"tasks":128,"nodes":1,"exclusive_access":true} + {"tasks":128,"tasks_per_node":128,"exclusive_access":true} ] }, { @@ -77,6 +80,10 @@ { "name": "discretization", "sequence": ["P1"] + }, + { + "name": "solver", + "sequence": ["gamg"] } ] } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 1e8b5066..4b7bad5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ render-benchmarks = "feelpp.benchmarking.report.__main__:main_cli" execute-benchmark = "feelpp.benchmarking.reframe.__main__:main_cli" girder-download = "feelpp.benchmarking.scripts.girder:download_cli" girder-upload = "feelpp.benchmarking.scripts.girder:upload_cli" -hpc-dispatch = "feelpp.benchmarking.scripts.hpcSystems:parseHpcSystems_cli" +hpc-dispatch = "feelpp.benchmarking.scripts.hpcSystems:hpcSystemDispatcher_cli" [tool.pytest.ini_options] minversion = "6.0" diff --git a/requirements.txt b/requirements.txt index 1de900d8..50dbe61c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,5 @@ pandas nbmake traitlets tabulate +typing-extensions>=4.12.2 . diff --git a/src/feelpp/benchmarking/reframe/__main__.py b/src/feelpp/benchmarking/reframe/__main__.py index 0440f5b0..fb691959 100644 --- a/src/feelpp/benchmarking/reframe/__main__.py +++ b/src/feelpp/benchmarking/reframe/__main__.py @@ -33,6 +33,15 @@ def createReportFolder(self,executable,use_case): return str(self.report_folder_path) + def buildExecutionMode(self): + """Write the ReFrame execution flag depending on the parser arguments. + Examples are --dry-run or -r + """ + if self.parser.args.dry_run: + return "--dry-run" + else: + return "-r" + def buildCommand(self,timeout): assert self.report_folder_path is not None, "Report folder path not set" cmd = [ @@ -47,7 +56,7 @@ def buildCommand(self,timeout): f"-J '#SBATCH --time={timeout}'", f'--perflogdir={os.path.join(self.machine_config.reframe_base_dir,"logs")}', f'{"-"+"v"*self.parser.args.verbose if self.parser.args.verbose else ""}', - '-r', + f'{self.buildExecutionMode()}' ] return ' '.join(cmd) @@ -56,7 +65,7 @@ def main_cli(): parser = Parser() parser.printArgs() - machine_reader = ConfigReader(parser.args.machine_config,MachineConfig) + machine_reader = ConfigReader(parser.args.machine_config,MachineConfig,dry_run=parser.args.dry_run) machine_reader.updateConfig() #Sets the cachedir and tmpdir directories for containers @@ -82,7 +91,7 @@ def main_cli(): configs = [config_filepath] if parser.args.plots_config: configs += [parser.args.plots_config] - app_reader = ConfigReader(configs,ConfigFile) + app_reader = ConfigReader(configs,ConfigFile,dry_run=parser.args.dry_run) executable_name = os.path.basename(app_reader.config.executable).split(".")[0] report_folder_path = cmd_builder.createReportFolder(executable_name,app_reader.config.use_case_name) app_reader.updateConfig(machine_reader.processor.flattenDict(machine_reader.config,"machine")) diff --git a/src/feelpp/benchmarking/reframe/config/configMachines.py b/src/feelpp/benchmarking/reframe/config/configMachines.py index 33df2623..06b93279 100644 --- a/src/feelpp/benchmarking/reframe/config/configMachines.py +++ b/src/feelpp/benchmarking/reframe/config/configMachines.py @@ -10,10 +10,13 @@ class Container(BaseModel): @field_validator("cachedir","tmpdir","image_base_dir",mode="before") @classmethod - def checkDirectories(cls,v): + def checkDirectories(cls,v, info): """Checks that the directories exists""" if v and not os.path.exists(v): - raise FileNotFoundError(f"Cannot find {v}") + if info.context.get("dry_run", False): + print(f"Dry Run: Skipping directory check for {v}") + else: + raise FileNotFoundError(f"Cannot find {v}") return v @@ -36,7 +39,6 @@ class MachineConfig(BaseModel): #TODO: maybe skipJsonSchema or something like that. environment_map: Optional[Dict[str,List[str]]] = {} - @model_validator(mode="after") def parseTargets(self): if not self.targets: diff --git a/src/feelpp/benchmarking/reframe/config/configReader.py b/src/feelpp/benchmarking/reframe/config/configReader.py index b7b159ad..0d17e41f 100644 --- a/src/feelpp/benchmarking/reframe/config/configReader.py +++ b/src/feelpp/benchmarking/reframe/config/configReader.py @@ -70,12 +70,15 @@ def decode(self, s: str): class ConfigReader: """ Class to load config files""" - def __init__(self, config_paths, schema): + def __init__(self, config_paths, schema, dry_run=False): """ Args: config_paths (str | list[str]) : Path to the config JSON file. If a list is provided, files will be merged. """ self.schema = schema + self.context = { + "dry_run":dry_run + } self.config = self.load( config_paths if type(config_paths) == list else [config_paths], schema @@ -97,7 +100,7 @@ def load(self,config_paths, schema): with open(config, "r") as cfg: self.config.update(json.load(cfg, cls=JSONWithCommentsDecoder)) - self.config = schema(**self.config) + self.config = schema.model_validate(self.config, context=self.context) return self.config @@ -109,7 +112,7 @@ def updateConfig(self, flattened_replace = None): """ if not flattened_replace: flattened_replace = self.processor.flattenDict(self.config.model_dump()) - self.config = self.schema(**self.processor.recursiveReplace(self.config.model_dump(),flattened_replace)) + self.config = self.schema.model_validate(self.processor.recursiveReplace(self.config.model_dump(),flattened_replace), context=self.context) def __repr__(self): return json.dumps(self.config.dict(), indent=4) \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/config/configSchemas.py b/src/feelpp/benchmarking/reframe/config/configSchemas.py index 2e2beb0c..bc7a8dbd 100644 --- a/src/feelpp/benchmarking/reframe/config/configSchemas.py +++ b/src/feelpp/benchmarking/reframe/config/configSchemas.py @@ -44,17 +44,29 @@ class Image(BaseModel): protocol:Optional[Literal["oras","docker","library","local"]] = None name:str - @model_validator(mode="after") - def extractProtocol(self): + @field_validator("protocol",mode="before") + @classmethod + def extractProtocol(cls, v, info): """ Extracts the image protocol (oras, docker, etc..) or if a local image is provided. If local, checks if the image exists """ - if "://" in self.name: - self.protocol = self.name.split("://")[0] + name = info.data.get("name","") + if "://" in name: + return name.split("://")[0] else: - self.protocol = "local" + return "local" - return self + @field_validator("name", mode="before") + @classmethod + def checkImage(cls,v,info): + if info.data["protocol"] == "local": + if not os.path.exists(v): + if info.context.get("dry_run", False): + print(f"Dry Run: Skipping image check for {v}") + else: + raise FileExistsError(f"Cannot find image {v}") + + return v class Platform(BaseModel): @@ -74,6 +86,7 @@ class ConfigFile(BaseModel): output_directory:Optional[str] = "" use_case_name: str options: List[str] + env_variables:Optional[Dict] = {} outputs: List[AppOutput] scalability: Scalability sanity: Sanity diff --git a/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.py b/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.py new file mode 100644 index 00000000..ea308cf2 --- /dev/null +++ b/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.py @@ -0,0 +1,45 @@ +site_configuration = { + 'systems':[ + { + 'name': 'discoverer', + 'descr': 'Discoverer', + 'hostnames': ['login\d+.discoverer.sofiatech.bg','cn*'], + 'modules_system': 'tmod4', + 'partitions': [ + { + 'name': 'cn', + 'scheduler': 'slurm', + 'launcher': 'srun', + 'max_jobs': 8, + 'access': ['--partition=cn --account=ehpc-dev-2024d05-047 --qos=ehpc-dev-2024d05-047'], + 'environs': ['default'], + 'processor': { + 'num_cpus': 128 + }, + 'devices': [ + { + 'type': 'cpu', + 'num_devices': 1320 #VALIDATE + } + ], + 'container_platforms':[ + { + 'type': 'Singularity' + } + ], + 'sched_options': { 'use_nodes_option': True }, + } + ], + 'env_vars':[ + ["OMP_NUM_THREADS",1] + ] + } + ], + 'environments': [ + { + 'name':'default', + 'modules': [], + 'target_systems':['discoverer:cn'] + } + ] +} \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.sh b/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.sh new file mode 100755 index 00000000..3cc47e8f --- /dev/null +++ b/src/feelpp/benchmarking/reframe/config/machineConfigs/discoverer.sh @@ -0,0 +1,42 @@ +#!/bin/bash -l +#SBATCH --nodes=1 # number of nodes +#SBATCH --ntasks=1 # number of tasks +#SBATCH --qos=default # SLURM qos +#SBATCH --ntasks-per-node=8 # number of tasks per node +#SBATCH --cpus-per-task=1 # number of cores per task +#SBATCH --time=02:00:00 # time (HH:MM:SS) +#SBATCH --partition=cn # partition +#SBATCH --account=ehpc-dev-2024d05-047 --qos=ehpc-dev-2024d05-047 # project account + + +source /etc/profile.d/modules.sh +export MODULEPATH=/opt/software/modulefiles + +matrix_config="" +benchmark_config="" +plots_config="" + +while true; do + case "$1" in + --matrix-config ) matrix_config="$2"; shift 2 ;; + --benchmark-config ) benchmark_config="$2"; shift 2 ;; + --plots-config ) plots_config="$2"; shift 2 ;; + -- ) shift; break ;; + * ) break ;; + esac +done + + + +/opt/software/python/3.9.7/bin/python3 -m venv .venv +source .venv/bin/activate + + +.venv/bin/python3.9 -m pip install --upgrade pip +.venv/bin/python3.9 -m pip install -I -r requirements.txt + +execute-benchmark \ + -mc $matrix_config \ + -bc $benchmark_config \ + -pc $plots_config \ + -v \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/config/machineConfigs/gaya.sh b/src/feelpp/benchmarking/reframe/config/machineConfigs/gaya.sh new file mode 100755 index 00000000..ed885fce --- /dev/null +++ b/src/feelpp/benchmarking/reframe/config/machineConfigs/gaya.sh @@ -0,0 +1,34 @@ +#!/bin/bash -l +#SBATCH --nodes=1 # number of nodes +#SBATCH --ntasks=1 # number of tasks +##SBATCH --qos=default # SLURM qos +#SBATCH --ntasks-per-node=8 # number of tasks per node +#SBATCH --cpus-per-task=1 # number of cores per task +#SBATCH --time=02:00:00 # time (HH:MM:SS) +#SBATCH --partition=public + +matrix_config="" +benchmark_config="" +plots_config="" + +while true; do + case "$1" in + --matrix-config ) matrix_config="$2"; shift 2 ;; + --benchmark-config ) benchmark_config="$2"; shift 2 ;; + --plots-config ) plots_config="$2"; shift 2 ;; + -- ) shift; break ;; + * ) break ;; + esac +done + + +python3 -m venv .venv +source .venv/bin/activate +python3 -m pip install --upgrade pip +python3 -m pip install -r requirements.txt + +execute-benchmark \ + -mc $matrix_config \ + -bc $benchmark_config \ + -pc $plots_config \ + -v \ No newline at end of file diff --git a/src/feelpp/benchmarking/reframe/parser.py b/src/feelpp/benchmarking/reframe/parser.py index 99168715..af02ff51 100644 --- a/src/feelpp/benchmarking/reframe/parser.py +++ b/src/feelpp/benchmarking/reframe/parser.py @@ -83,6 +83,7 @@ def addArgs(self): options.add_argument('--verbose', '-v', action='count', default=0, help='Select Reframe\'s verbose level by specifying multiple v\'s. ') options.add_argument('--help', '-h', action='help', help='Display help and quit program') options.add_argument('--website', '-w', action='store_true', help='Render reports, compile them and create the website.') + options.add_argument('--dry-run', action='store_true', help='Execute ReFrame in dry-run mode. No tests will run, but the script to execute it will be generated in the stage directory. Config validation will be skipped, although warnings will be raised if bad.') def convertPathsToAbsolute(self): diff --git a/src/feelpp/benchmarking/reframe/regression.py b/src/feelpp/benchmarking/reframe/regression.py index fb0ad105..1a3a939c 100644 --- a/src/feelpp/benchmarking/reframe/regression.py +++ b/src/feelpp/benchmarking/reframe/regression.py @@ -18,6 +18,11 @@ def initHandlers(self): self.scalability_handler = ScalabilityHandler(self.app_setup.reader.config.scalability) self.outputs_handler = OutputsHandler(self.app_setup.reader.config.outputs,self.app_setup.reader.config.additional_files) + @run_after('run') + def executionGuard(self): + if self.is_dry_run(): + self.skip("ReFrame is in dry-run mode, perormance and sanity are not going to be evaluated.") + @run_before('performance') def setPerfVars(self): self.perf_variables = {} diff --git a/src/feelpp/benchmarking/reframe/setup.py b/src/feelpp/benchmarking/reframe/setup.py index 595264be..84f251e4 100644 --- a/src/feelpp/benchmarking/reframe/setup.py +++ b/src/feelpp/benchmarking/reframe/setup.py @@ -6,7 +6,7 @@ from feelpp.benchmarking.reframe.outputs import OutputsHandler import reframe as rfm -import os, re, shutil +import os, re, shutil, sys import numpy as np ##### TODO: This is very messy :( Rethink the design @@ -48,7 +48,7 @@ def __init__(self,config_filepath): config_filepath (str): Path of the machine configuration json file """ super().__init__() - self.reader = ConfigReader(config_filepath,MachineConfig) + self.reader = ConfigReader(config_filepath,MachineConfig, dry_run = "--dry-run" in sys.argv) self.updateConfig() def setupAfterInit(self,rfm_test,app_config): @@ -85,8 +85,6 @@ def setValidEnvironments(self, rfm_test): #Consider adding this to the docs rfm_test.valid_systems = [f"{self.reader.config.machine}:{part}" for part in self.reader.config.partitions] rfm_test.valid_prog_environs = self.reader.config.prog_environments - print("Valid Systems after init ", rfm_test.valid_systems) - print("Valid envs after init ", rfm_test.valid_prog_environs) def setPlatform(self, rfm_test,app_config): """ Sets the container_platform attributes @@ -95,8 +93,6 @@ def setPlatform(self, rfm_test,app_config): """ platform = app_config.platforms[self.reader.config.platform] if self.reader.config.platform != "builtin": - if not os.path.exists(platform.image.name): - raise FileExistsError(f"Cannot find image {platform.image.name}") rfm_test.container_platform.image = platform.image.name rfm_test.container_platform.options = platform.options + self.reader.config.containers[self.reader.config.platform].options rfm_test.container_platform.workdir = None @@ -119,7 +115,7 @@ def __init__(self,config_filepath,machine_config): """ super().__init__() self.config_filepath = config_filepath - self.reader = ConfigReader(config_filepath,ConfigFile) + self.reader = ConfigReader(config_filepath,ConfigFile, dry_run = "--dry-run" in sys.argv) self.updateConfig(self.reader.processor.flattenDict(machine_config,"machine")) self.updateConfig() @@ -137,7 +133,11 @@ def setupBeforeRun(self,rfm_test,machine_config): self.setExecutable(rfm_test,machine_config) def setupAfterInit(self, rfm_test): - pass + self.setEnvVariables() + + def setEnvVariables(self): + for env_var_name,env_var_value in self.reader.config.env_variables.items(): + os.environ[env_var_name] = env_var_value def cleanupDirectories(self): if os.path.exists(self.reader.config.scalability.directory): @@ -256,4 +256,4 @@ def setupBeforeRun(self): self.job.launcher.options += self.current_partition.get_resource('launcher_options') self.machine_setup.setupBeforeRun(self) - self.app_setup.setupBeforeRun(self,self.machine_setup.reader.config) + self.app_setup.setupBeforeRun(self,self.machine_setup.reader.config) \ No newline at end of file diff --git a/src/feelpp/benchmarking/scripts/hpcSystems.py b/src/feelpp/benchmarking/scripts/hpcSystems.py index 2df34c99..2bfa859e 100644 --- a/src/feelpp/benchmarking/scripts/hpcSystems.py +++ b/src/feelpp/benchmarking/scripts/hpcSystems.py @@ -1,74 +1,96 @@ from argparse import ArgumentParser import os,json from feelpp.benchmarking.reframe.config.configReader import JSONWithCommentsDecoder +from pathlib import Path -def parseHpcSystems_cli(): - - runners = { - "meluxina": { - "runner": "self-meluxina", - "machine": "meluxina", - "partition": "truePartition", - "python_version": "3.6", - "api_version": "v0.0.38", - "user_name": "u101096", - "account": "p200229", - "url": "http://slurmrestd.meluxina.lxp.lu:6820", - "submit": "rest" - }, - "gaya":{ - "runner": "self-gaya", - "machine": "gaya", - "partition": "truePartition", - "python_version": "3.10", - "api_version": "", - "user_name": "prudhomm", - "account": "", - "url": "", - "submit": "cli" - }, - "lumi":{ - "runner": "self-lumi", - "machine": "lumi", - "partition": "truePartition", - "python_version": "3.6", - "api_version": "", - "user_name": "prudhomm", - "account": "", - "url": "", - "submit": "sbatch" - }, - "discoverer":{ - "runner": "self-discoverer", - "machine": "discoverer", - "partition": "truePartition", - "python_version": "3.6", - "api_version": "", - "user_name": "vchabannes", - "account": "", - "url": "", - "submit": "cli" - }, - "karolina":{ - "runner": "self-karolina", - "machine": "karolina", - "partition": "truePartition", - "python_version": "3.6", - "api_version": "", - "user_name": "vchabannes", - "account": "", - "url": "", - "submit": "cli" - } - } +class HpcSystem: + def __init__( + self, + runner, + machine, + partition = "", + python_version = "", + user_name = "", + api_version = "", + account = "", + url = "", + submit = "" + ): + self.runner = runner + self.machine = machine + self.partition = partition + self.python_version = python_version + self.user_name = user_name + self.api_version = api_version + self.account = account + self.url = url + self.submit = submit + def toDict(self): + return self.__dict__ + + def writeConfig(self,output_dir,machine_data): + self.machine_cfg = os.path.join(output_dir,f"{self.machine}.json") + self.reports_path = machine_data["reports_base_dir"] + with open(self.machine_cfg,"w") as f: + json.dump(machine_data,f) + + def createSumbitCommand(self, benchmark_config_path, plots_config_path): + assert hasattr(self,"machine_cfg") and self.machine_cfg, "machine config path has not been set" + + self.submit_command = SubmissionCommandFactory.create(self.submit,self.machine,[ + f"--matrix-config {self.machine_cfg} ", + f"--benchmark-config {benchmark_config_path} ", + f"--plots-config {plots_config_path} " + ]) + + +class HpcSystemFactory: + @staticmethod + def dispatch(machine_name): + if machine_name == "gaya": + return HpcSystem( runner = "self-gaya", machine = "gaya", python_version = "3.10", user_name = "prudhomm", submit = "cli" ) + elif machine_name == "discoverer": + return HpcSystem( runner = "self-discoverer", machine = "discoverer", partition = "truePartition", python_version = "3.6", user_name = "vchabannes", submit = "cli" ) + else: + raise ValueError(f"HPC resource {machine_name} not found...") + + + +class JobSubmission: + def __init__(self,machine): + self.executable = None + self.script = f"./src/feelpp/benchmarking/reframe/config/machineConfigs/{machine}.sh" + + def buildCommand(self, options): + pass + +class Cli(JobSubmission): + def __init__(self,machine): + super().__init__(machine) + self.executable = "bash" + + def buildCommand(self, options): + return " ".join([self.executable, self.script] + options) + +class SubmissionCommandFactory: + @staticmethod + def create(submit,machine,options): + if submit == "cli": + return Cli(machine).buildCommand(options) + else: + raise ValueError(f"{submit} is not supported") + +def hpcSystemDispatcher_cli(): parser = ArgumentParser() - parser.add_argument("--machine_config_path", "-mp", required=True, type=str, help="path to the machines config json") - parser.add_argument("--output_dir", "-o", required=True, type=str, help="path to folder where individual machine configs should be stored") + parser.add_argument("--machine_config_path", "-mcp", required=True, type=str, help="path to the machines config json") + parser.add_argument("--benchmark_config_path", "-bcp", required=True, type=str, help="path to the benchmark config json") + parser.add_argument("--plots_config_path", "-pcp", required=True, type=str, help="path to the plots config json") + parser.add_argument("--machine_output_dir", "-mod", required=True, type=str, help="path to folder where individual machine configs should be stored") args = parser.parse_args() - if not os.path.exists(args.output_dir): - os.makedirs(args.output_dir) + if not os.path.exists(args.machine_output_dir): + os.makedirs(args.machine_output_dir) with open(args.machine_config_path,"r") as f: machines = json.load(f,cls=JSONWithCommentsDecoder) @@ -76,16 +98,13 @@ def parseHpcSystems_cli(): matrix = [] for machine_data in machines: - if machine_data["machine"] not in runners: - raise ValueError(f"{machine_data['machine']} not found in runner mapping") - - machine_config_path = os.path.join(args.output_dir,f"{machine_data['machine']}.json") - with open(machine_config_path,"w") as f: - json.dump(machine_data,f) - - runner_info = runners[machine_data["machine"]] - runner_info["machine_cfg"] = machine_config_path - matrix.append(runner_info) + hpc_system = HpcSystemFactory().dispatch(machine_data["machine"]) + hpc_system.writeConfig(args.machine_output_dir,machine_data) + hpc_system.createSumbitCommand( + args.benchmark_config_path, + args.plots_config_path + ) + matrix.append(hpc_system.toDict()) print(matrix)