Skip to content

Commit

Permalink
docs: add extracted argument reference
Browse files Browse the repository at this point in the history
  • Loading branch information
xieby1 committed Dec 19, 2024
1 parent 77492c7 commit 7dc1b6f
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 27 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ book
*_py.dot
*_py.svg
__pycache__
*extract.md
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,6 @@ doc: $(shell find . -name "*.md") ${PYSVGs}
# css can only recognize intrinsic size in px
# https://developer.mozilla.org/en-US/docs/Glossary/Intrinsic_Size
sed -i 's/\([0-9]\+\)pt/\1px/g' $@

docs/reference/default_extract.md: ./docs/extract_comments.py default.nix
$^ $@
258 changes: 231 additions & 27 deletions default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -4,57 +4,261 @@
}) {}
, lib ? pkgs.lib

#######################################################################################
# Common Configuration
#######################################################################################
/**
<style>
arg {
font-family: mono;
font-size: 1.2em;
font-weight: bold;
}
arg::before {
content: "• "
}
</style>
*/
/** ## Common Configuration */

/**
<arg>cc</arg>: Compiler Collection used for compiling RISC-V binaries.
* **Type**: string
* **Default value**: `"gcc14"`
* **Available values**: Prefix of any nixpkgs-supported <u>xxx</u>Stdenv.
To list available <u>xxx</u>Stdenv:
```bash
nix-instantiate --eval -E 'let pkgs=import <nixpkgs> {}; in builtins.filter (x: pkgs.lib.hasSuffix "Stdenv" x)(builtins.attrNames pkgs)'
```
* **TODO**: Currently only supports GCC's stdenv.
LLVM's fortran compiler (flang) is needed to support Clang's stdenv.
Preliminary experiments with riscv64-jemalloc show that Clang provides better auto-vectorization than GCC.
*/
, cc ? "gcc14"

#######################################################################################
# Benchmarks Configuration
#######################################################################################
# Benchmarks Common Configuration ###############################
/** ## Benchmarks Configuration */

/** ### Benchmarks Common Configuration */

/**
<arg>enableVector</arg>: Controls compiler's auto-vectorization during benchmark builds.
* **Type**: bool
* **Default value**: `false`
*/
, enableVector ? false

# SPEC CPU 2006 Configuration ###################################
/** ### SPEC CPU 2006 Configuration */

/**
<arg>spec2006-extra-tag</arg>: Extra tag for SPEC CPU 2006 output names.
* **Type**: string
* **Default value**: `""`
* **Example**:
Setting `spec2006-extra-tag = "miao"`,
the checkpoint name changes from `spec2006_ref_..._1core_cpt` to `spec2006_ref_..._1core_miao_cpt`.
*/
, spec2006-extra-tag ? ""

/**
<arg>spec2006-src</arg>: Path to SPEC CPU 2006 source code.
* <span style="background-color:yellow;">**Note**</span>:
As SPEC CPU 2006 is a proprietary benchmark, it cannot be incorporated in Deterload's source code.
You need to obatin the its source code through legal means.
* **Type**: path
* **Supported path types**:
* Path to a folder:
The folder must be the root directory of the SPEC CPU 2006 source code.
Example:
```nix
spec2006-src = /path/miao/spec2006;
```
Required folder structure:
```
/path/miao/spec2006
├── benchspec/
├── bin/
├── tools/
├── shrc
...
```
* Path to a tar file:
The tar file must contain a folder named exactly `spec2006`,
with the same folder structure as above.
Supported tar file extensions:
* gzip (.tar.gz, .tgz or .tar.Z)
* bzip2 (.tar.bz2, .tbz2 or .tbz)
* xz (.tar.xz, .tar.lzma or .txz)
Example:
```nix
spec2006-src = /path/of/spec2006.tar.gz;
```
* For more information about supported path types,
please see [Nixpkgs Manual: The unpack phase](https://nixos.org/manual/nixpkgs/stable/#ssec-unpack-phase).
*/
, spec2006-src ? throw "Please specify <spec2006-src> the path of spec2006, for example: /path/of/spec2006.tar.gz"

/**
<arg>spec2006-size</arg>: Input size for SPEC CPU 2006.
* **Type**: string
* **Default value**: `"ref"`
* **Available values**: `"ref"`, `"train"`, `"test"`
*/
, spec2006-size ? "ref"

/**
<arg>spec2006-optimize</arg>: Compiler optimization flags for SPEC CPU 2006.
* **Type**: string
* **Default value**: `"-O3 -flto"`
*/
, spec2006-optimize ? "-O3 -flto"

/**
<arg>spec2006-march</arg>: Compiler's `-march` option for SPEC CPU 2006.
* **Type**: string
* **Default value**: "rv64gc${lib.optionalString enableVector "v"}_zba_zbb_zbc_zbs"
* **Description**: The default value depends on `enableVector`:
* If `enableVector` is `true`, the default value is `"rv64gc_zba_zbb_zbc_zbs"`,
* If `enableVector` is `false`, the default value is `"rv64gcv_zba_zbb_zbc_zbs"`.
*/
, spec2006-march ? "rv64gc${lib.optionalString enableVector "v"}_zba_zbb_zbc_zbs"
# spec2006-testcase-filter is a function of type `string -> bool`
# It takes a testcase name from spec2006 as input and returns:
# * true: include this testcase
# * false: exclude this testcase
# For example:
# * Include all testcases: `testcase: true;`
# * Only include 403_gcc: `testcase: testcase=="403_gcc";`
# * Exlcude "464_h264ref" and "465_tonto": `testcase: !(builtins.elem testcase ["464_h264ref" "465_tonto"]);`

/**
<arg>spec2006-testcase-filter</arg>: Function to filter SPEC CPU 2006 testcases.
* **Type**: string -> bool
* **Default value**: `testcase: true`
* **Description**: `spec2006-testcase-filter` takes a testcase name as input and returns:
* `true`: include this testcase
* `false`: exclude this testcase
* **Example 1**: Include all testcases:
```nix
spec2006-testcase-filter = testcase: true;
```
* **Example 2**: Only include `403_gcc`:
```nix
spec2006-testcase-filter = testcase: testcase == "403_gcc";
```
* **Example 3**: Exlcude `464_h264ref` and `465_tonto`:
```nix
spec2006-testcase-filter = testcase: !(builtins.elem testcase [
"464_h264ref"
"465_tonto"
]);
```
*/
, spec2006-testcase-filter ? testcase: true

# OpenBLAS Configuration ########################################
/** ### OpenBLAS Configuration */

/**
<arg>openblas-extra-tag</arg>: Extra tag for OpenBLAS output names.
* **Type**: string
* **Default value**: `""`
* **Description**:
Setting `openblas-extra-tag = "miao"`,
the checkpoint name changes from `openblas_ref_..._1core_cpt` to `openblas_ref_..._1core_miao_cpt`.
*/
, openblas-extra-tag ? ""

/**
<arg>openblas-target</arg>: CPU TARGET for OpenBLAS.
* **Type**: string
* **Default value**: `if enableVector then "RISCV64_ZVL128B" else "RISCV64_GENERIC"`
* **Available values**: `"RISCV64_GENERIC"`, `"RISCV64_ZVL128B"`, `"RISCV64_ZVL256B"`
* **Description**: The default value depends on `enableVector`:
* If `enableVector` is `true`, the default value is `"RISCV64_ZVL128B"`,
* If `enableVector` is `false`, the default value is `"RISCV64_GENERIC"`.
*/
, openblas-target ? if enableVector then "RISCV64_ZVL128B" else "RISCV64_GENERIC"

#######################################################################################
# Builders Configuration
#######################################################################################
/** ## Builders Configuration */

/**
<arg>cpt-maxK</arg>: maxK value for all benchmarks in checkpoint generation.
* **Type**: number-in-string
* **Default value**: `"30"`
* **Description**:
maxK is a parameter in SimPoint algorithm used during the checkpoint's clustering stage.
`cpt-maxK` will set maxK for all benchmarks' clustering stage in checkpoints generation.
To override the maxK for specific benchmarks, refer to the `cpt-maxK-bmk` argument.
*/
, cpt-maxK ? "30"
# cpt-maxK for each benchmark
# How to get the benchmark name:
# Use command: `nix-instantiate --eval -A <benchmark>.benchmark.pname/name`
# Try `pname` first, if not available then use `name`. Examples:
# * Using pname: `nix-instantiate --eval -A openblas.benchmark.pname`
# * Using name: `nix-instantiate --eval -A spec2006.483_xalancbmk.benchmark.name`

/**
<arg>cpt-maxK-bmk</arg>: maxK values for specifed benchmarks in checkpoint generation.
* **Type**: attr (`{ benchmark-name = number-in-string; ... }`)
* **Default value**: `{ "483.xalancbmk" = "100"; }`
* **Description**:
`cpt-maxK-bmk` sets the the maxK for specifed benchmarks.
Unspecified benchmarks will use the value from `cpt-maxK`.
This attribute consists of key-value pairs where:
* Key: benchmark name.
* Value: number in a string (same format as `cpt-maxK`).
* **FAQ 1**: Why set maxK of 483.xalancbmk to 100?
* Setting maxK to 30 for 483.xalancbmk resulted in unstable scores.
* **FAQ 2**: How to retreive the benchmark name?
* Use the following commands:
```bash
# Try `pname` first, if not available, use `name`.
nix-instantiate --eval -A <benchmark>.benchmark.pname
nix-instantiate --eval -A <benchmark>.benchmark.name
```
Examples:
```bash
# To retreive the name of openblas benchmark, first try
nix-instantiate --eval -A openblas.benchmark.pname
# Output: "openblas"
```
```bash
# To retreive the name of 483_xalancbmk benchmark, first try
nix-instantiate --eval -A spec2006.483_xalancbmk.benchmark.pname
# Error: attribute 'pname' in selection path 'spec2006.483_xalancbmk.benchmark.pname' not found Did you mean name?
# Second try
nix-instantiate --eval -A spec2006.483_xalancbmk.benchmark.name
# Output: "483.xalancbmk"
```
*/
, cpt-maxK-bmk ? {
# TODO: rename xxx.yyyyyyy to xxx_yyyyyy ?
"483.xalancbmk" = "100";
}

/**
<arg>cpt-intervals</arg>: Number of BBV interval instructions in checkpoint generation.
* **Type**: number-in-string
* **Default value**: `"20000000"`
*/
, cpt-intervals ? "20000000"

/**
<arg>cpt-simulator</arg>: Simulator used in checkpoint generation.
* **Type**: string
* **Default value**: `"qemu"`
* **Available values**: `"qemu"`, `"nemu"`
* **Note**:
Though nemu is faster than qemu, the current version of nemu is not deterministic.
Therefore, qemu is chosen as the default simulator.
For more information, refer to [OpenXiangShan/Deterload Issue #8: nemu is not deterministic](https://github.com/OpenXiangShan/Deterload/issues/8).
*/
, cpt-simulator ? "qemu"

/**
<arg>cpt-format</arg>: Compress format of output checkpoints.
* **Type**: string
* **Default value**: `"zstd"`
* **Available value**: `"zstd"`, `"gz"`
* **Note**: nemu supports both formats; however, qemu only supports zstd format.
*/
, cpt-format ? "zstd"
}:
assert pkgs.pkgsCross.riscv64 ? "${cc}Stdenv";
assert lib.assertOneOf "spec2006-size" spec2006-size ["ref" "test"];
assert lib.assertOneOf "spec2006-size" spec2006-size ["ref" "train" "test"];
assert lib.assertOneOf "openblas-target" openblas-target ["RISCV64_GENERIC" "RISCV64_ZVL128B" "RISCV64_ZVL256B"];
assert lib.assertOneOf "cpt-simulator" cpt-simulator ["qemu" "nemu"];
assert lib.assertOneOf "cpt-format" cpt-format ["gz" "zstd"];
Expand Down
37 changes: 37 additions & 0 deletions docs/extract_comments.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/usr/bin/env python3
import re, sys, os

if len(sys.argv) < 2:
print(f"Usage: {sys.argv[0]} <source_file> <output_file>")
sys.exit(1)

source_file = sys.argv[1]
output_file = sys.argv[2] if len(sys.argv)>2 else os.path.splitext(source_file)[0] + ".md"

file_contentntent: str
with open(source_file, 'r') as f: file_contentntent = f.read()

matched: list[tuple[str,str]] = re.findall(r"([^\n]*/\*\*)(.*?)\*/", file_contentntent, re.DOTALL)
processed_comments_content: list[str] = []
for groups in matched:
# replace the leading .../** with spaces
comment_content = len(groups[0])*" " + groups[1]
lines = comment_content.split('\n')

# remove leading and tailing empty lines
if len(lines[0].strip(' '))==0: lines = lines[1:]
if len(lines[-1].strip(' '))==0: lines = lines[:-1]

# get indent of each line
indents = map(lambda line: len(line) - len(line.lstrip(' ')), lines)
min_indent = min(indents)

processed_lines = lines
# remove min indent
processed_lines = map(lambda line: line[min_indent:], processed_lines)
# remove tailing spaces
processed_lines = map(lambda line: line.rstrip(' '), processed_lines)

processed_comments_content.append('\n'.join(processed_lines))

with open(output_file, "w") as f: f.write('\n\n'.join(processed_comments_content))
4 changes: 4 additions & 0 deletions docs/reference/config.md
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# 🧾可配参数(Configurable Arguments)

`--argstr key value` is short version of `--arg key '"value"'`

{{ #include ./default_extract.md }}

0 comments on commit 7dc1b6f

Please sign in to comment.