From 59d8595a7a3ecc3288fe345152f1efa27df4b969 Mon Sep 17 00:00:00 2001
From: Geir Storli <geirst@vespa.ai>
Date: Wed, 30 Oct 2024 16:31:25 +0100
Subject: [PATCH] Add script to run performance tests locally using podman.

After each run the performance test results are stored in a JSON file,
later used as input to create_report.py.
---
 .../ecommerce_hybrid_search/create_report.py  | 32 ++++++---
 .../ecommerce_hybrid_search/requirements.txt  |  2 +
 .../ecommerce_hybrid_search/run-perf-test.sh  | 65 +++++++++++++++++++
 3 files changed, 90 insertions(+), 9 deletions(-)
 create mode 100644 tests/performance/ecommerce_hybrid_search/requirements.txt
 create mode 100755 tests/performance/ecommerce_hybrid_search/run-perf-test.sh

diff --git a/tests/performance/ecommerce_hybrid_search/create_report.py b/tests/performance/ecommerce_hybrid_search/create_report.py
index 78c54c55f..34213cf23 100644
--- a/tests/performance/ecommerce_hybrid_search/create_report.py
+++ b/tests/performance/ecommerce_hybrid_search/create_report.py
@@ -17,8 +17,13 @@ def get_cpu(metrics):
     regex = re.compile(r'\["cpuutil", "[^"]+"\]')
     for key, value in metrics.items():
         if regex.match(key):
+            # If the performance tests are run on a machine where CPU-util sampling
+            # is not available (e.g. in a virtual machine), assume that 1 CPU core was used
+            # to avoid division by zero when calculating 'per CPU core' metrics.
+            if float(value) == 0.0:
+                return 1.0
             return float(value) * machine_cpus
-    return 0.0
+    return 1.0
 
 
 def load_feed_results(file_name, system):
@@ -281,8 +286,8 @@ def generate_query_summary_figure(title, file_name, df, text_label_font_size=7):
     fig.write_image(file_name, format='png', scale=1.5)
 
 
-def generate_query_hockey_stick_figure(title, file_name, df):
-    print(f'\nGenerate query hockey stick figure: {file_name}:')
+def generate_query_qps_figure(title, file_name, df):
+    print(f'\nGenerate query qps figure: {file_name}:')
     print(df)
     fig = make_subplots(rows=3, cols=1, vertical_spacing=0.08)
     add_scatter_plot_to_figure(fig, 1, 1, df, 'qps', 'l_avg')
@@ -358,9 +363,9 @@ def generate_query_figures(vespa_file, es_files, output):
             filtered_df = df.query(f"phase == 'after_flush' and filter == {filter_query} and type == '{type}'")
             type_text = type + (' filtered' if filter_query else '')
             file_suffix = ('filter_' if filter_query else '') + type
-            generate_query_hockey_stick_figure(f'QPS for {type_text} queries after initial feeding',
-                                               f'{output}/query_hockey_stick_{file_suffix}.png',
-                                               filtered_df)
+            generate_query_qps_figure(f'QPS for {type_text} queries after initial feeding',
+                                      f'{output}/query_qps_{file_suffix}.png',
+                                      filtered_df)
 
     generate_overall_qps_figure(output, df)
 
@@ -430,10 +435,19 @@ def generate_overall_summary_figure(vespa_file, es_files, output):
 def main():
     parser = argparse.ArgumentParser(description="Tool that summarizes feed and query results "
                                                  "between Vespa and ES runs of the performance test")
+    # Prerequisites:
+    # pip install -r requirements.txt
+    #
     # How to use:
-    # The results of a performance test run are logged as JSON in the test log output under:
-    # '#### Performance results ####'
-    # Create a file with these results, one JSON object (per line) per data sample.
+    # 1) If running the test locally using run-perf-test.sh the results are placed in perf_results/8.427.7/
+    # 2) If extracting the results from a performance test run on factory:
+    #   The results are logged as JSON in the test log output under:
+    #   '#### Performance results ####'
+    #   Create a file with these results, one JSON object (per line) per data sample.
+    #
+    # To generate all figures:
+    # python3 create_report.py --machine_cpus 128 --test_cpus 62 --output report_output perf_results/8.427.7/vespa.json perf_results/8.427.7/elasticsearch.json perf_results/8.427.7/elasticsearch-force-merged.json figure
+    #
     parser.add_argument('vespa_file', type=str, help='Path to Vespa result file')
     parser.add_argument('es_files', nargs='+', help='Path to ES result file(s)')
     parser.add_argument('report_type',
diff --git a/tests/performance/ecommerce_hybrid_search/requirements.txt b/tests/performance/ecommerce_hybrid_search/requirements.txt
new file mode 100644
index 000000000..71d2d28e6
--- /dev/null
+++ b/tests/performance/ecommerce_hybrid_search/requirements.txt
@@ -0,0 +1,2 @@
+pandas
+plotly
diff --git a/tests/performance/ecommerce_hybrid_search/run-perf-test.sh b/tests/performance/ecommerce_hybrid_search/run-perf-test.sh
new file mode 100755
index 000000000..6b8485885
--- /dev/null
+++ b/tests/performance/ecommerce_hybrid_search/run-perf-test.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+# Use this script to run one of the following performance tests locally:
+#   vespa - ecommerce_hybrid_search.rb
+#   elasticsearch - ecommerce_hybrid_search_es.rb
+#   elasticsearch-force-merged - ecommerce_hybrid_search_es_merge_1.rb
+#
+# The performance results of a run are placed in a JSON file in perf_results/$VERSION.
+# Use the create_report.py script to generate a report based on the results of the above three runs.
+#
+print_usage() {
+    echo "Usage $0 {vespa|elasticsearch|elasticsearch-force-merged}"
+}
+
+VERSION=8.427.7
+CONTAINER_NAME=system-tests
+DOCKER_IMAGE=docker.io/vespaengine/vespa-systemtest-preview
+TEST_DIR=/system-test/tests/performance/ecommerce_hybrid_search
+export RUBYLIB="/system-test/lib:/system-test/tests"
+delete_tmp_dir=true
+
+run_perf_test() {
+    local test_path=$TEST_DIR/$2
+    echo "Running performance test for $1 ($test_path)"
+    podman pull $DOCKER_IMAGE:$VERSION
+    podman run --privileged --rm --name $CONTAINER_NAME -ti -v $PWD/../../../:/system-test -w /system-test -e RUBYLIB=$RUBYLIB --entrypoint /usr/bin/env $DOCKER_IMAGE:$VERSION bash -l -c "ruby /system-test/lib/node_server.rb & sleep 3; ruby $test_path --outputdir $TEST_DIR/tmp"
+}
+
+copy_perf_results() {
+    local results_path=perf_results/$VERSION/$1.json
+    echo "Copying performance results to $results_path"
+    mkdir -p perf_results/$VERSION
+    cp tmp/$2/hybrid_search/results/all_perf.json $results_path
+}
+
+if [ "$#" -ne 1 ]; then
+    print_usage
+    exit 1
+fi
+
+case "$1" in
+  vespa)
+    run_perf_test "$1" "ecommerce_hybrid_search.rb"
+    copy_perf_results "$1" "EcommerceHybridSearchTest"
+    ;;
+  elasticsearch)
+    run_perf_test "$1" "ecommerce_hybrid_search_es.rb"
+    copy_perf_results "$1" "EcommerceHybridSearchESTest"
+    ;;
+  elasticsearch-force-merged)
+    run_perf_test "$1" "ecommerce_hybrid_search_es_merge_1.rb"
+    copy_perf_results "$1" "EcommerceHybridSearchESForceMerge1Test"
+    ;;
+  *)
+    echo "Invalid option: $1"
+    print_usage
+    exit 1
+    ;;
+esac
+
+if [ "$delete_tmp_dir" = true ]; then
+    echo "Deleting tmp directory storing output from performance test run"
+    rm -rf tmp
+fi
+