-
-
Notifications
You must be signed in to change notification settings - Fork 12
/
jmh-run.sh
296 lines (246 loc) · 9.79 KB
/
jmh-run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
#!/bin/bash
# jmh-run.sh
# Copyright 2016 - 2021 headissue GmbH, Jens Wilke
# This script to run a benchmark suite with JMH.
# stop after error
set -e;
# switch command echo on for debugging
# set -x;
test -n "$BENCHMARK_THREADS" || {
BENCHMARK_THREADS="2 4 8";
CPU_COUNT=`cat /proc/cpuinfo | grep "^processor" | wc -l`;
if [ $CPU_COUNT -gt 31 ]; then
BENCHMARK_THREADS="4 8 16 32";
fi
}
echo "Machine CPU count: $CPU_COUNT, selecting thread configuration: "$BENCHMARK_THREADS;
test -n "$BENCHMARK_IMPLS" || BENCHMARK_IMPLS="caffeine ehcache3 cache2k"
# http://mechanical-sympathy.blogspot.de/2011/11/biased-locking-osr-and-benchmarking-fun.html
# http://www.oracle.com/technetwork/tutorials/tutorials-1876574.html
# test -n "$BENCHMARK_JVM_ARGS" || BENCHMARK_JVM_ARGS="-server -Xmx2G -XX:+UseG1GC";
# biased locking delay is 4000 by default, enable from the start to minimize effects on the first benchmark iteration
# (check with: ava -XX:+UnlockDiagnosticVMOptions -XX:+PrintFlagsFinal 2>/dev/null | grep BiasedLockingStartupDelay)
# -Xmx10G: we don't limit the heap, so Java is taking plenty from the OS
test -n "$BENCHMARK_JVM_ARGS" || BENCHMARK_JVM_ARGS="-server -XX:BiasedLockingStartupDelay=0";
# -verbose:gc
# extra G1 args
# BENCHMARK_JVM_ARGS="$BENCHMARK_JVM_ARGS -XX:+UseG1GC -XX:-G1UseAdaptiveConcRefinement -XX:G1ConcRefinementGreenZone=2G -XX:G1ConcRefinementThreads=0";
# -wi warmup iterations
# -w warmup time
# -i number of iterations
# -r time
# -f how many time to fork a single benchmark
# only test whether everything is running through
test -n "$BENCHMARK_QUICK" || BENCHMARK_QUICK="-f 1 -wi 1 -w 1s -i 1 -r 1s -foe true";
# have fast but at least three iterations to detect outliers
test -n "$BENCHMARK_NORMAL" || BENCHMARK_NORMAL="-f 1 -wi 2 -w 5s -i 3 -r 5s";
# test -n "$BENCHMARK_NORMAL" || BENCHMARK_NORMAL="-f 1 -wi 2 -w 2s -i 3 -r 2s";
# -f 2 / -i 2 has not enough confidence, there is sometimes one outlier
# 2 full warmups otherwise there is big jitter with G1
# -gc true: careful with -gc true, this seems to influence the measures performance significantly
test -n "$BENCHMARK_DILIGENT" || BENCHMARK_DILIGENT="-f 3 -wi 2 -w 10s -i 2 -r 10s";
# longer test run for expiry tests
test -n "$BENCHMARK_DILIGENT_LONG" || BENCHMARK_DILIGENT_LONG="-f 2 -wi 1 -w 180s -i 2 -r 180s";
# test -n "$BENCHMARK_DILIGENT_LONG" || BENCHMARK_DILIGENT_LONG="-f 2 -wi 2 -w 15s -i 3 -r 15s";
# setup for blog article:
# 5x30 warumups needed for cache2k 10M performance with CMS
# test -n "$BENCHMARK_DILIGENT" || BENCHMARK_DILIGENT="-f 2 -wi 5 -w 30s -i 3 -r 30s";
# other experiments:
# test -n "$BENCHMARK_DILIGENT" || BENCHMARK_DILIGENT="-gc true -f 3 -wi 5 -w 30s -i 5 -r 30s";
# test -n "$BENCHMARK_DILIGENT" || BENCHMARK_DILIGENT="-f 3 -wi 5 -w 30s -i 5 -r 30s";
# test -n "$BENCHMARK_DILIGENT" || BENCHMARK_DILIGENT="-gc true -f 2 -wi 0 -w 40s -i 8 -r 20s";
# How to use -perf perf and -perf perfasm profiler with Ubuntu: Provide perf support and
# disassembler
# apt-get install perf linux-tools-generic libhsdis0-fcml
#
# When using the hwe kernel, e.g.:
#
# apt-get install linux-tools-generic-hwe-20.04
# Old information:
# Tinker benchmark options to do profiling and add assembler code output (linux only).
# Needs additional disassembly library to display assembler code
# see: http://psy-lob-saw.blogspot.de/2013/01/java-print-assembly.html
# and, see: https://wiki.openjdk.java.net/display/HotSpot/PrintAssembly
# download from: https://kenai.com/projects/base-hsdis/downloads
# install with e.g.: mv ~/Downloads/linux-hsdis-amd64.so jdk1.8.0_45/jre/lib/amd64/hsdis-amd64.so.
# For profiling only do one fork, but more measurement iterations
# profilers are described here: http://java-performance.info/introduction-jmh-profilers
# hsdis is available as Ubuntu package: sudo apt-get install libhsdis0-fcml
test -n "$BENCHMARK_PERFASM" || BENCHMARK_PERFASM="-f 1 -wi 1 -w 10s -i 1 -r 20s -prof perfasm:hotThreshold=0.05";
# longer test run for expiry tests
test -n "$BENCHMARK_PERFASM_LONG" || BENCHMARK_PERFASM_LONG="-f 1 -wi 1 -w 180s -i 1 -r 180s -prof perfasm:hotThreshold=0.05";
# hs_gc: detailed counters from the GC implementation
STANDARD_PROFILER="-prof comp -prof gc";
# STANDARD_PROFILER="$STANDARD_PROFILER -prof org.cache2k.benchmark.jmh.ForcedGcMemoryProfiler";
STANDARD_PROFILER="$STANDARD_PROFILER -prof org.cache2k.benchmark.jmh.LinuxVmProfiler";
STANDARD_PROFILER="$STANDARD_PROFILER -prof org.cache2k.benchmark.jmh.MiscResultRecorderProfiler";
STANDARD_PROFILER="$STANDARD_PROFILER -prof org.cache2k.benchmark.jmh.GcProfiler";
STANDARD_PROFILER="$STANDARD_PROFILER -prof org.cache2k.benchmark.jmh.HeapProfiler";
EXTRA_PROFILER="";
EXTRA_PARAMETERS="";
# not used yet
PERF_NORM_OPTIONS="-prof perfnorm:useDefaultStat=true"
OPTIONS="$BENCHMARK_NORMAL";
OPTIONS_LONG="$BENCHMARK_NORMAL_LONG";
if test -z "$JAVA_HOME"; then
echo "JAVA_HOME needs to be set" 1>&2
exit 1;
fi
java=$JAVA_HOME/bin/java
unset dry;
unset quick;
usage() {
echo "Usage: $0 [ options ] complete"
echo "--quick Run a smoke test only, not the full benchmark"
echo "--diligent Run benchmark with longer iteration time"
echo "--dry Log the command lines to execute, but do not run test"
echo "--perfasm"
echo "--perfnorm"
echo "--impls <impls> Test only the given cache implementations, default: $BENCHMARK_IMPLS"
echo ""
echo "Command:"
echo ""
echo "complete runs the benchmark suite"
}
processCommandLine() {
while true; do
case "$1" in
--quick) quick=true;
EXTRA_PARAMETERS="-p entryCount=100000 -p percent=110"
BENCHMARK_THREADS="4";
OPTIONS="$BENCHMARK_QUICK";
OPTIONS_LONG="$BENCHMARK_QUICK";;
--diligent) OPTIONS="$BENCHMARK_DILIGENT";
OPTIONS_LONG="$BENCHMARK_DILIGENT_LONG";;
--perfasm) OPTIONS="$BENCHMARK_PERFASM";
OPTIONS_LONG="$BENCHMARK_PERFASM_LONG";;
--perfnorm) EXTRA_PROFILER=$EXTRA_PROFILER" $PERF_NORM_OPTIONS";;
--impls) BENCHMARK_IMPLS="$2"; shift; ;;
--dry) dry=true;
java="dryEcho";;
--echo|--debug) set -x;;
-*) echo "unknown option: $1"; usage; exit 1;;
*) if test -z "$1"; then
usage; exit 1;
fi
"$1";
exit 0;;
esac
shift 1;
done
}
filterProgress() {
awk '/^# Run .*/ { print; }';
}
START_TIME=0;
startTimer() {
START_TIME=`date +%s`;
}
stopTimer() {
local t=`date +%s`;
echo "Finished at: `date`"
echo "Total runtime $(( $t - $START_TIME ))s";
}
# quote argument if it is containing whitespace
dryEcho() {
echo -n "java";
for i in "$@"; do
#if [[ $i =~ [[:space:]] ]]; then
# echo -n ' "'"$i"'"'
#else
# echo -n " $i";
#fi
printf " %q" "$i";
done
echo;
}
JAR="jmh-suite/target/benchmarks.jar";
test -f $JAR || JAR="benchmarks.jar";
TARGET="$HOME/jmh-result";
test -d $TARGET || mkdir -p $TARGET;
startTimer;
# we use taskset for limiting cores, that works properly with newer JDKs
# in previous benchmarks we used OS CPU hotplugging, which requires root.
limitCores() {
if test -n "$dry"; then
shift;
"$@";
return;
fi
local cnt=$1;
shift;
taskset -c 0-$(( $cnt - 1)) "$@";
}
implementations="`cat - << "EOF"
cache2k -p cacheFactory=org.cache2k.benchmark.cache.Cache2kFactory
cache2kj -p cacheFactory=org.cache2k.benchmark.JCacheFactory -p cacheProvider=org.cache2k.jcache.provider.JCacheProvider
cache2kw -p cacheFactory=org.cache2k.benchmark.Cache2kWiredFactory
caffeine -p cacheFactory=org.cache2k.benchmark.cache.CaffeineCacheFactory
ehcache3 -p cacheFactory=org.cache2k.benchmark.cache.EhCache3Factory
chm -p cacheFactory=org.cache2k.benchmark.ConcurrentHashMapFactory
slhm -p cacheFactory=org.cache2k.benchmark.SynchronizedLinkedHashMapFactory
plhm -p cacheFactory=org.cache2k.benchmark.PartitionedLinkedHashMapFactory
guava -p cacheFactory=org.cache2k.benchmark.thirdparty.GuavaCacheFactory
chm -p cacheFactory=org.cache2k.benchmark.ConcurrentHashMapFactory
slhm -p cacheFactory=org.cache2k.benchmark.SynchronizedLinkedHashMapFactory
plhm -p cacheFactory=org.cache2k.benchmark.PartitionedLinkedHashMapFactory
EOF
`"
benchmark() {
local impl="$1";
local benchmark="$2";
local threads="$3";
local variant="$4";
local param="$5";
factory="`echo "$implementations" | awk "/^$impl / { print substr(\\$0, length(\\$1) + 2); }"`"
if test -n "$variant"; then
runid="$impl-$benchmark-$threads-$variant";
else
runid="$impl-$benchmark-$threads";
fi
fn="$TARGET/result-$runid";
echo;
echo "## $runid";
sync
limitCores $threads $java -jar $JAR \\.$benchmark -jvmArgs "$BENCHMARK_JVM_ARGS" $OPTIONS $STANDARD_PROFILER $EXTRA_PROFILER \
$EXTRA_PARAMETERS -t $threads -p shortName=$impl -p variant=$variant $param $factory \
-rf json -rff "$fn.json" \
2>&1 | tee $fn.out | filterProgress
if test -n "$dry"; then
cat $fn.out;
else
echo "=> $fn.out";
fi
}
complete() {
# benchmarks we keep an eye on, but do not run through all thread variations
reducedBenchmarks="ZipfianSequenceBulkLoadingBenchmark IterationBenchmark"
# current benchmarks with detailed output
benchmarks="ZipfianSequenceLoadingBenchmark PopulateParallelClearBenchmark PopulateParallelOnceBenchmark PopulateParallelTwiceBenchmark";
# reducedBenchmarks=""
# benchmarks="IterationBenchmark"
for impl in $BENCHMARK_IMPLS; do
for benchmark in $benchmarks; do
echo $BENCHMARK_THREADS
for thread in $BENCHMARK_THREADS; do
benchmark $impl $benchmark $thread;
done
done
done
# run this set of benchmarks with less threads
for impl in $BENCHMARK_IMPLS; do
for benchmark in $reducedBenchmarks; do
for thread in 8; do
benchmark $impl $benchmark $thread;
done
done
done
benchmark=ZipfianSequenceLoadingBenchmark;
for impl in $BENCHMARK_IMPLS; do
for thread in $BENCHMARK_THREADS; do
benchmark $impl $benchmark $thread tti "-p tti=true -p percent=110"
done
done
stopTimer;
}
processCommandLine "$@";