Skip to content

Commit

Permalink
Merge pull request #771 from zakkak/2024-07-17-july-cpu-2024-sync
Browse files Browse the repository at this point in the history
[24.0] Sync with upstream for July 2024 CPU
  • Loading branch information
zakkak authored Jul 19, 2024
2 parents c35c2d5 + 0b56897 commit ccf6db8
Show file tree
Hide file tree
Showing 135 changed files with 3,050 additions and 1,237 deletions.
48 changes: 29 additions & 19 deletions ci/ci_common/bench-common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,17 @@
else true
},

# max number of threads to use for benchmarking in general
# the goal being to limit parallelism on very large servers which may not be respresentative of real-world scenarios
bench_max_threads:: {
restrict_threads:: 36
},

bench_no_thread_cap:: {
restrict_threads:: null,
should_use_hwloc:: false
},

bench_hw:: {
_bench_machine:: {
targets+: ["bench"],
Expand All @@ -40,15 +51,16 @@
numa_nodes:: [],
is_numa:: std.length(self.numa_nodes) > 0,
num_threads:: error "num_threads must bet set!",
hyperthreading:: true,
threads_per_node:: if self.is_numa then self.num_threads / std.length(self.numa_nodes) else self.num_threads,
},

x52:: common.linux_amd64 + self._bench_machine + {
machine_name:: "x52",
capabilities+: ["tmpfs25g"],
e3:: common.linux_amd64 + self._bench_machine + {
machine_name:: "e3",
capabilities: ["e3", "tmpfs25g", "linux", "amd64"],
numa_nodes:: [0, 1],
default_numa_node:: 0,
num_threads:: 72
default_numa_node:: 1,
num_threads:: 256
},
e4_8_64:: common.linux_amd64 + self._bench_machine + {
machine_name:: "e4_8_64",
Expand All @@ -67,31 +79,29 @@
xgene3:: common.linux_aarch64 + self._bench_machine + {
machine_name:: "xgene3",
capabilities+: [],
num_threads:: 32
num_threads:: 32,
hyperthreading:: false
},
a12c:: common.linux_aarch64 + self._bench_machine + {
machine_name:: "a12c",
capabilities+: ["tmpfs25g"],
numa_nodes:: [0, 1],
default_numa_node:: 0,
num_threads:: 160
num_threads:: 160,
hyperthreading:: false
}
},

hwlocIfNuma(numa, cmd, node=0)::
if numa then
hwloc_cmd(cmd, num_threads, node, hyperthreading, max_threads_per_node)::
if num_threads == null then
["hwloc-bind", "--cpubind", "node:"+node, "--membind", "node:"+node, "--"] + cmd
else
cmd,

parallelHwloc(cmd_node0, cmd_node1)::
// Returns a list of commands that will run cmd_nod0 on NUMA node 0
// concurrently with cmd_node1 on NUMA node 1 and then wait for both to complete.
[
$.hwlocIfNuma(true, cmd_node0, node=0) + ["&"],
$.hwlocIfNuma(true, cmd_node1, node=1) + ["&"],
["wait"]
],
local threads = if num_threads != null then num_threads else max_threads_per_node;
assert if hyperthreading then threads % 2 == 0 else true: "It is required to bind to an even number of threads on hyperthreaded machines. Got requested "+threads+" threads";
assert threads <= max_threads_per_node: "Benchmarking must run on a single NUMA node for stability reasons. Got requested "+threads+" threads but the machine has only "+max_threads_per_node+" threads per node"; local cores = if hyperthreading then "0-"+((threads/2)-1)+".pu:0-1" else "0-"+(threads-1)+".pu:0";
local cpu_bind = if hyperthreading then "node:"+node+".core:"+cores else "node:"+node+".core:"+cores+".pu:0";
["hwloc-bind", "--cpubind", cpu_bind, "--membind", "node:"+node, "--"] + cmd
,

// building block used to generate fork builds
many_forks_benchmarking:: common.build_base + {
Expand Down
16 changes: 8 additions & 8 deletions common.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"Jsonnet files should not include this file directly but use ci/common.jsonnet instead."
],

"mx_version": "7.4.1",
"mx_version": "7.4.1.1",

"COMMENT.jdks": "When adding or removing JDKs keep in sync with JDKs in ci/common.jsonnet",
"jdks": {
Expand Down Expand Up @@ -44,13 +44,13 @@
"labsjdk-ee-21Debug": {"name": "labsjdk", "version": "ee-21.0.1+11-jvmci-23.1-b26-debug", "platformspecific": true },
"labsjdk-ee-21-llvm": {"name": "labsjdk", "version": "ee-21.0.1+11-jvmci-23.1-b26-sulong", "platformspecific": true },

"oraclejdk-latest": {"name": "jpg-jdk", "version": "22", "build_id": "jdk-22.0.1+8", "platformspecific": true, "extrabundles": ["static-libs"]},
"labsjdk-ce-latest": {"name": "labsjdk", "version": "ce-22.0.1+8-jvmci-b01", "platformspecific": true },
"labsjdk-ce-latestDebug": {"name": "labsjdk", "version": "ce-22.0.1+8-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ce-latest-llvm": {"name": "labsjdk", "version": "ce-22.0.1+8-jvmci-b01-sulong", "platformspecific": true },
"labsjdk-ee-latest": {"name": "labsjdk", "version": "ee-22.0.1+8-jvmci-b01", "platformspecific": true },
"labsjdk-ee-latestDebug": {"name": "labsjdk", "version": "ee-22.0.1+8-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ee-latest-llvm": {"name": "labsjdk", "version": "ee-22.0.1+8-jvmci-b01-sulong", "platformspecific": true }
"oraclejdk-latest": {"name": "jpg-jdk", "version": "22", "build_id": "jdk-22.0.2+9", "platformspecific": true, "extrabundles": ["static-libs"]},
"labsjdk-ce-latest": {"name": "labsjdk", "version": "ce-22.0.2+9-jvmci-b01", "platformspecific": true },
"labsjdk-ce-latestDebug": {"name": "labsjdk", "version": "ce-22.0.2+9-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ce-latest-llvm": {"name": "labsjdk", "version": "ce-22.0.2+9-jvmci-b01-sulong", "platformspecific": true },
"labsjdk-ee-latest": {"name": "labsjdk", "version": "ee-22.0.2+9-jvmci-b01", "platformspecific": true },
"labsjdk-ee-latestDebug": {"name": "labsjdk", "version": "ee-22.0.2+9-jvmci-b01-debug", "platformspecific": true },
"labsjdk-ee-latest-llvm": {"name": "labsjdk", "version": "ee-22.0.2+9-jvmci-b01-sulong", "platformspecific": true }
},

"eclipse": {
Expand Down
46 changes: 20 additions & 26 deletions compiler/ci/ci_common/benchmark-builders.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -11,41 +11,35 @@

local main_builds = std.flattenArrays([
[
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.dacapo + PR_bench_libgraal,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.dacapo_size_variants,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.scala_dacapo + PR_bench_libgraal,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.scala_dacapo_size_variants,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.renaissance + PR_bench_libgraal,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.specjvm2008 + PR_bench_libgraal,
c.weekly + hw.x52 + jdk + cc.libgraal + bench.specjbb2015,
c.daily + c.opt_post_merge + hw.x52 + jdk + cc.libgraal + bench.awfy + PR_bench_libgraal,
c.daily + hw.x52 + jdk + cc.libgraal + bench.microservice_benchmarks,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_graal_whitebox,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_graal_dist,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_misc_graal_dist,
c.daily + hw.x52 + jdk + cc.libgraal + bench.micros_shootout_graal_dist,
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.awfy + {job_prefix:: "bench-e4vm-compiler"},
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.dacapo + {job_prefix:: "bench-e4vm-compiler"},
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.scala_dacapo + {job_prefix:: "bench-e4vm-compiler"},
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.renaissance + {job_prefix:: "bench-e4vm-compiler"},
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.specjvm2008 + {job_prefix:: "bench-e4vm-compiler"},
c.daily + hw.e4_8_64 + jdk + cc.libgraal + bench.microservice_benchmarks + {job_prefix:: "bench-e4vm-compiler"},
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.dacapo + PR_bench_libgraal,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.scala_dacapo + PR_bench_libgraal,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.renaissance + PR_bench_libgraal,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.specjvm2008 + PR_bench_libgraal,
c.on_demand + hw.e3 + jdk + cc.libgraal + bench.dacapo_size_variants,
c.on_demand + hw.e3 + jdk + cc.libgraal + bench.scala_dacapo_size_variants,
c.monthly + hw.e3 + jdk + cc.libgraal + bench.specjbb2015,
c.daily + c.opt_post_merge + hw.e3 + jdk + cc.libgraal + bench.awfy + PR_bench_libgraal,
c.daily + hw.e3 + jdk + cc.libgraal + bench.microservice_benchmarks,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.micros_graal_whitebox,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.micros_graal_dist,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.micros_misc_graal_dist,
c.weekly + hw.e3 + jdk + cc.libgraal + bench.micros_shootout_graal_dist,
]
for jdk in cc.product_jdks
]),

local profiling_builds = std.flattenArrays([
[
c.weekly + hw.x52 + jdk + cc.libgraal + suite + cc.enable_profiling + { job_prefix:: "bench-compiler-profiling" },
c.weekly + hw.x52 + jdk + cc.libgraal + suite + cc.footprint_tracking + { job_prefix:: "bench-compiler-footprint" }
c.monthly + hw.e3 + jdk + cc.libgraal + suite + cc.enable_profiling + { job_prefix:: "bench-compiler-profiling" },
c.monthly + hw.e3 + jdk + cc.libgraal + suite + cc.footprint_tracking + { job_prefix:: "bench-compiler-footprint" }
]
for jdk in cc.product_jdks
for suite in bench.groups.main_suites
]),

local weekly_amd64_forks_builds = std.flattenArrays([
bc.generate_fork_builds(c.weekly + hw.x52 + jdk + cc.libgraal + suite, subdir='compiler') +
bc.generate_fork_builds(c.monthly + hw.x52 + jdk + cc.jargraal + suite, subdir='compiler')
bc.generate_fork_builds(c.weekly + hw.e3 + jdk + cc.libgraal + suite, subdir='compiler') +
bc.generate_fork_builds(c.monthly + hw.e3 + jdk + cc.jargraal + suite, subdir='compiler')
for jdk in cc.product_jdks
for suite in bench.groups.weekly_forks_suites
]),
Expand All @@ -70,7 +64,7 @@
],

local zgc_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.zgc_mode + suite,
c.weekly + hw.e3 + jdk + cc.libgraal + cc.zgc_mode + suite,
for jdk in cc.product_jdks
for suite in bench.groups.main_suites + [bench.specjbb2015]
],
Expand All @@ -83,13 +77,13 @@
],

local no_tiered_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.no_tiered_comp + suite,
c.monthly + hw.e3 + jdk + cc.libgraal + cc.no_tiered_comp + suite,
for jdk in cc.product_jdks
for suite in bench.groups.main_suites
],

local no_profile_info_builds = [
c.weekly + hw.x52 + jdk + cc.libgraal + cc.no_profile_info + suite,
c.monthly + hw.e3 + jdk + cc.libgraal + cc.no_profile_info + suite,
for jdk in cc.product_jdks
for suite in bench.groups.main_suites
],
Expand Down
49 changes: 15 additions & 34 deletions compiler/ci/ci_common/benchmark-suites.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

// suite definitions
// *****************
awfy: cc.compiler_benchmark + c.heap.small + {
awfy: cc.compiler_benchmark + c.heap.small + bc.bench_max_threads + {
suite:: "awfy",
run+: [
self.benchmark_cmd + ["awfy:*", "--"] + self.extra_vm_args
Expand All @@ -36,7 +36,7 @@
max_jdk_version:: null
},

dacapo: cc.compiler_benchmark + c.heap.default + {
dacapo: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "dacapo",
run+: [
self.benchmark_cmd + ["dacapo:*", "--"] + self.extra_vm_args
Expand All @@ -48,7 +48,7 @@
max_jdk_version:: null
},

dacapo_size_variants: cc.compiler_benchmark + c.heap.default + {
dacapo_size_variants: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "dacapo-size-variants",
run+: [
self.benchmark_cmd + ["dacapo-small:*", "--"] + self.extra_vm_args,
Expand All @@ -64,7 +64,7 @@
max_jdk_version:: null
},

scala_dacapo: cc.compiler_benchmark + c.heap.default + {
scala_dacapo: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "scala-dacapo",
run+: [
self.benchmark_cmd + ["scala-dacapo:*", "--"] + self.extra_vm_args
Expand All @@ -76,7 +76,7 @@
max_jdk_version:: null
},

scala_dacapo_size_variants: cc.compiler_benchmark + c.heap.default + {
scala_dacapo_size_variants: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "scala-dacapo-size-variants",
run+: [
self.benchmark_cmd + ["scala-dacapo-tiny:*", "--"] + self.extra_vm_args,
Expand All @@ -97,7 +97,7 @@
max_jdk_version:: null
},

renaissance_template(suite_version=null, suite_name="renaissance", max_jdk_version=null):: cc.compiler_benchmark + c.heap.default + {
renaissance_template(suite_version=null, suite_name="renaissance", max_jdk_version=null):: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: suite_name,
local suite_version_args = if suite_version != null then ["--bench-suite-version=" + suite_version] else [],
run+: [
Expand All @@ -110,12 +110,9 @@
max_jdk_version:: max_jdk_version
},

renaissance: self.renaissance_template() + {
# [JDK-8303076] [GR-44499] requires extra stack size for C1
extra_vm_args+:: if self.platform == "c1" then ["-Xss1090K"] else []
},
renaissance: self.renaissance_template(),

specjbb2015: cc.compiler_benchmark + c.heap.large_with_large_young_gen + {
specjbb2015: cc.compiler_benchmark + c.heap.large_with_large_young_gen + bc.bench_max_threads + {
suite:: "specjbb2015",
downloads+: {
"SPECJBB2015": { name: "specjbb2015", version: "1.03" }
Expand All @@ -130,7 +127,7 @@
max_jdk_version:: null
},

specjvm2008: cc.compiler_benchmark + c.heap.default + {
specjvm2008: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "specjvm2008",
downloads+: {
"SPECJVM2008": { name: "specjvm2008", version: "1.01" }
Expand All @@ -146,7 +143,7 @@
},

// Microservice benchmarks
microservice_benchmarks: cc.compiler_benchmark + {
microservice_benchmarks: cc.compiler_benchmark + bc.bench_no_thread_cap + { # no thread cap here since hwloc is handled at the mx level for microservices
suite:: "microservices",
packages+: {
"pip:psutil": "==5.8.0"
Expand All @@ -159,42 +156,26 @@
local hwlocBind_16C_32T = ["--hwloc-bind=--cpubind node:0.core:0-15.pu:0-1 --membind node:0"],
run+: [
# shopcart-wrk
self.benchmark_cmd + ["shopcart-wrk:mixed-tiny"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms32m", "-Xmx112m", "-XX:ActiveProcessorCount=1", "-XX:MaxDirectMemorySize=256m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-small"] + hwlocBind_2C_2T + ["--"] + self.extra_vm_args + ["-Xms64m", "-Xmx224m", "-XX:ActiveProcessorCount=2", "-XX:MaxDirectMemorySize=512m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-medium"] + hwlocBind_4C_4T + ["--"] + self.extra_vm_args + ["-Xms128m", "-Xmx512m", "-XX:ActiveProcessorCount=4", "-XX:MaxDirectMemorySize=1024m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-large"] + hwlocBind_16C_16T + ["--"] + self.extra_vm_args + ["-Xms512m", "-Xmx3072m", "-XX:ActiveProcessorCount=16", "-XX:MaxDirectMemorySize=4096m"],
bench_upload,
self.benchmark_cmd + ["shopcart-wrk:mixed-huge"] + hwlocBind_16C_32T + ["--"] + self.extra_vm_args + ["-Xms1024m", "-Xmx8192m", "-XX:ActiveProcessorCount=32", "-XX:MaxDirectMemorySize=8192m"],
bench_upload,

# petclinic-wrk
self.benchmark_cmd + ["petclinic-wrk:mixed-tiny"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms32m", "-Xmx100m", "-XX:ActiveProcessorCount=1"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-small"] + hwlocBind_2C_2T + ["--"] + self.extra_vm_args + ["-Xms40m", "-Xmx144m", "-XX:ActiveProcessorCount=2"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-medium"] + hwlocBind_4C_4T + ["--"] + self.extra_vm_args + ["-Xms80m", "-Xmx256m", "-XX:ActiveProcessorCount=4"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-large"] + hwlocBind_16C_16T + ["--"] + self.extra_vm_args + ["-Xms128m", "-Xmx512m", "-XX:ActiveProcessorCount=16"],
bench_upload,
self.benchmark_cmd + ["petclinic-wrk:mixed-huge"] + hwlocBind_16C_32T + ["--"] + self.extra_vm_args + ["-Xms640m", "-Xmx3072m", "-XX:ActiveProcessorCount=32"],
bench_upload,

# helloworld-wrk
self.benchmark_cmd + ["micronaut-helloworld-wrk:helloworld"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms8m", "-Xmx64m", "-XX:ActiveProcessorCount=1", "-XX:MaxDirectMemorySize=256m"],
bench_upload,
self.benchmark_cmd + ["spring-helloworld-wrk:helloworld"] + hwlocBind_1C_1T + ["--"] + self.extra_vm_args + ["-Xms8m", "-Xmx64m", "-XX:ActiveProcessorCount=1", "-XX:MaxDirectMemorySize=256m"],
bench_upload
],
timelimit: "7:00:00",
timelimit: "4:00:00",
min_jdk_version:: 11,
max_jdk_version:: null
},

// JMH microbenchmarks
micros_graal_whitebox: cc.compiler_benchmark + c.heap.default + {
micros_graal_whitebox: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-graal-whitebox",
run+: [
self.benchmark_cmd + ["jmh-whitebox:*", "--"] + self.extra_vm_args + ["--", "jdk.graal.compiler"]
Expand All @@ -204,7 +185,7 @@
max_jdk_version:: null
},

micros_graal_dist: cc.compiler_benchmark + c.heap.default + {
micros_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_COMPILER_MICRO_BENCHMARKS", "--"] + self.extra_vm_args
Expand All @@ -214,7 +195,7 @@
max_jdk_version:: null
},

micros_misc_graal_dist: cc.compiler_benchmark + c.heap.default + {
micros_misc_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-misc-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_BENCH_MISC", "--"] + self.extra_vm_args
Expand All @@ -224,7 +205,7 @@
max_jdk_version:: null
},

micros_shootout_graal_dist: cc.compiler_benchmark + c.heap.default {
micros_shootout_graal_dist: cc.compiler_benchmark + c.heap.default + bc.bench_max_threads + {
suite:: "micros-shootout-graal-dist",
run+: [
self.benchmark_cmd + ["jmh-dist:GRAAL_BENCH_SHOOTOUT", "--"] + self.extra_vm_args
Expand Down
3 changes: 2 additions & 1 deletion compiler/ci/ci_common/compiler-common.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@
"${BENCH_RESULTS_FILE_PATH}",
"--machine-name=${MACHINE_NAME}"] +
(if std.objectHasAll(self.environment, 'MX_TRACKER') then ["--tracker=" + self.environment['MX_TRACKER']] else ["--tracker=rss"]),
benchmark_cmd:: bench_common.hwlocIfNuma(self.should_use_hwloc, self.plain_benchmark_cmd, node=self.default_numa_node),
restrict_threads:: null, # can be overridden to restrict the benchmark to the given number of threads. If null, will use one full NUMA node
benchmark_cmd:: if self.should_use_hwloc then bench_common.hwloc_cmd(self.plain_benchmark_cmd, self.restrict_threads, self.default_numa_node, self.hyperthreading, self.threads_per_node) else self.plain_benchmark_cmd,
min_heap_size:: if std.objectHasAll(self.environment, 'XMS') then ["-Xms${XMS}"] else [],
max_heap_size:: if std.objectHasAll(self.environment, 'XMX') then ["-Xmx${XMX}"] else [],
extra_vm_args::
Expand Down
Loading

0 comments on commit ccf6db8

Please sign in to comment.