Skip to content

Commit

Permalink
Merge pull request #18 from anandhu-eng/cm_readme_inference_update
Browse files Browse the repository at this point in the history
Cm readme inference update
  • Loading branch information
arjunsuresh authored Sep 3, 2024
2 parents 37674fa + 8815065 commit 5c73d16
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 38 deletions.
9 changes: 1 addition & 8 deletions docs/benchmarks/language/bert.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,34 +8,27 @@ hide:
=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

BERT-99
{{ mlperf_inference_implementation_readme (4, "bert-99", "reference") }}

BERT-99.9
{{ mlperf_inference_implementation_readme (4, "bert-99.9", "reference") }}

=== "Nvidia"
## Nvidia MLPerf Implementation

BERT-99
{{ mlperf_inference_implementation_readme (4, "bert-99", "nvidia") }}

BERT-99.9
{{ mlperf_inference_implementation_readme (4, "bert-99.9", "nvidia") }}

=== "Intel"
## Intel MLPerf Implementation
BERT-99

{{ mlperf_inference_implementation_readme (4, "bert-99", "intel") }}

BERT-99.9
{{ mlperf_inference_implementation_readme (4, "bert-99.9", "intel") }}

=== "Qualcomm"
## Qualcomm AI100 MLPerf Implementation

BERT-99
{{ mlperf_inference_implementation_readme (4, "bert-99", "qualcomm") }}

BERT-99.9
{{ mlperf_inference_implementation_readme (4, "bert-99.9", "qualcomm") }}
8 changes: 1 addition & 7 deletions docs/benchmarks/language/gpt-j.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,31 @@ hide:

=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

GPT-J-99


{{ mlperf_inference_implementation_readme (4, "gptj-99", "reference") }}

GPTJ-99.9

{{ mlperf_inference_implementation_readme (4, "gptj-99.9", "reference") }}

=== "Nvidia"
## Nvidia MLPerf Implementation

GPTJ-99

{{ mlperf_inference_implementation_readme (4, "gptj-99", "nvidia") }}

GPTJ-99.9

{{ mlperf_inference_implementation_readme (4, "gptj-99.9", "nvidia") }}

=== "Intel"
## Intel MLPerf Implementation
GPTJ-99

{{ mlperf_inference_implementation_readme (4, "gptj-99", "intel") }}


=== "Qualcomm"
## Qualcomm AI100 MLPerf Implementation

GPTJ-99

{{ mlperf_inference_implementation_readme (4, "gptj-99", "qualcomm") }}

6 changes: 0 additions & 6 deletions docs/benchmarks/language/llama2-70b.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,26 +9,20 @@ hide:
=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

LLAMA2-70b-99
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "reference") }}

LLAMA2-70b-99.9
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "reference") }}

=== "Nvidia"
## Nvidia MLPerf Implementation

LLAMA2-70b-99
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "nvidia") }}

LLAMA2-70b-99.9
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "nvidia") }}

=== "Neural Magic"
## Neural Magic MLPerf Implementation

LLAMA2-70b-99
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99", "neuralmagic") }}

LLAMA2-70b-99.9
{{ mlperf_inference_implementation_readme (4, "llama2-70b-99.9", "neuralmagic") }}
1 change: 0 additions & 1 deletion docs/benchmarks/language/mixtral-8x7b.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,4 @@ hide:
=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

MIXTRAL-8x7b
{{ mlperf_inference_implementation_readme (4, "mixtral-8x7b", "reference") }}
6 changes: 0 additions & 6 deletions docs/benchmarks/medical_imaging/3d-unet.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,30 +9,24 @@ hide:
=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

3d-unet-99

{{ mlperf_inference_implementation_readme (4, "3d-unet-99", "reference") }}

3d-unet-99.9

{{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "reference") }}

=== "Nvidia"
## Nvidia MLPerf Implementation
3d-unet-99

{{ mlperf_inference_implementation_readme (4, "3d-unet-99", "nvidia") }}

3d-unet-99.9

{{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "nvidia") }}

=== "Intel"
## Intel MLPerf Implementation
3d-unet-99

{{ mlperf_inference_implementation_readme (4, "3d-unet-99", "intel") }}

3d-unet-99.9

{{ mlperf_inference_implementation_readme (4, "3d-unet-99.9", "intel") }}
8 changes: 1 addition & 7 deletions docs/benchmarks/recommendation/dlrm-v2.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,26 +10,20 @@ hide:
=== "MLCommons-Python"
## MLPerf Reference Implementation in Python

DLRM-v2-99
{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "reference") }}

DLRM-v2-99.9
{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "reference") }}

=== "Nvidia"
## Nvidia MLPerf Implementation

DLRM-v2-99

{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "nvidia") }}

DLRM-v2-99.9
{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "nvidia") }}

=== "Intel"
## Intel MLPerf Implementation

DLRM-v2-99
{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99", "intel") }}

DLRM-v2-99.9
{{ mlperf_inference_implementation_readme (4, "dlrm-v2-99.9", "intel") }}
20 changes: 17 additions & 3 deletions main.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,17 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
content=""
scenarios = []
execution_envs = ["Docker","Native"]
code_version="r4.1"
code_version="r4.1-dev"

if model == "rnnt":
code_version="r4.0"

if implementation == "reference":
# Tip
if "99.9" not in model:
content += f"\n{pre_space}!!! tip\n\n"
content += f"{pre_space} - MLCommons reference implementations are only meant to provide a rules compliant reference implementation for the submitters and in most cases are not best performing. If you want to benchmark any system, it is advisable to use the vendor MLPerf implementation for that system like Nvidia, Intel etc.\n\n"

devices = [ "CPU", "CUDA", "ROCm" ]
if model.lower() == "resnet50":
frameworks = [ "Onnxruntime", "Tensorflow", "Deepsparse" ]
Expand All @@ -39,6 +44,11 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
frameworks = [ "pytorch" ]

elif implementation == "intel":
# Tip
if "99.9" not in model:
content += f"\n{pre_space}!!! tip\n\n"
content += f"{pre_space} - Intel MLPerf inference implementation is available only for datacenter category and has been tested only on a limited number of systems. Most of the benchmarks using Intel implementation require at least Intel Sapphire Rapids or higher CPU generation.\n\n"

if model not in [ "bert-99", "bert-99.9", "gptj-99", "gptj-99.9", "resnet50", "retinanet", "3d-unet-99", "3d-unet-99.9", "dlrm-v2-99", "dlrm-v2-99.9", "sdxl" ]:
return pre_space+" WIP"
if model in [ "bert-99", "bert-99.9", "retinanet", "3d-unet-99", "3d-unet-99.9" ]:
Expand Down Expand Up @@ -72,6 +82,8 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
else:
categories = [ "Edge", "Datacenter" ]

# model name
content += f"{pre_space}{model.upper()}\n\n"
for category in categories:
if category == "Edge" and not scenarios:
scenarios = [ "Offline", "SingleStream" ]
Expand Down Expand Up @@ -115,6 +127,8 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
continue # Nvidia implementation only supports execution through docker
content += f"{cur_space2}=== \"{execution_env}\"\n"
content += f"{cur_space3}###### {execution_env} Environment\n\n"
# ref to cm installation
content += f"{cur_space3}Please refer to the [installation page](../../install/index.md) to install CM for running the automated benchmark commands.\n\n"
test_query_count=get_test_query_count(model, implementation, device)

if "99.9" not in model: #not showing docker command as it is already done for the 99% variant
Expand All @@ -140,7 +154,7 @@ def mlperf_inference_implementation_readme(spaces, model, implementation):
content += f"{cur_space3}The above command should get you to an interactive shell inside the docker container and do a quick test run for the Offline scenario. Once inside the docker container please do the below commands to do the accuracy + performance runs for each scenario.\n\n"
content += f"{cur_space3}<details>\n"
content += f"{cur_space3}<summary> Please click here to see more options for the docker launch </summary>\n\n"
content += f"{cur_space3}* `--docker_cm_repo <Custom CM repo URL>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
content += f"{cur_space3}* `--docker_cm_repo=<Custom CM repo URL>`: to use a custom fork of cm4mlops repository inside the docker image\n\n"
content += f"{cur_space3}* `--docker_cache=no`: to not use docker cache during the image build\n"

if device.lower() not in [ "cuda" ]:
Expand Down Expand Up @@ -337,7 +351,7 @@ def get_run_cmd_extra(f_pre_space, model, implementation, device, scenario, scen
return extra_content

@env.macro
def mlperf_inference_run_command(spaces, model, implementation, framework, category, scenario, device="cpu", execution_mode="test", test_query_count="20", docker=False, scenarios = [], code_version="r4.1"):
def mlperf_inference_run_command(spaces, model, implementation, framework, category, scenario, device="cpu", execution_mode="test", test_query_count="20", docker=False, scenarios = [], code_version="r4.1-dev"):
pre_space = ""
for i in range(1,spaces):
pre_space = pre_space + " "
Expand Down

0 comments on commit 5c73d16

Please sign in to comment.