From c7aae9f4386cb89bbf9976ab439672cf98a40d83 Mon Sep 17 00:00:00 2001 From: Kamesh Akella Date: Fri, 9 Aug 2024 03:57:40 -0400 Subject: [PATCH] add new perfInsights.py script to analyse the data generated in result_data branch (#905) Signed-off-by: Kamesh Akella Signed-off-by: Alexander Schwartz Co-authored-by: Alexander Schwartz --- .gitignore | 5 + benchmark/src/main/python/README.adoc | 3 + benchmark/src/main/python/perfInsights.py | 140 ++++++++++++++++++ benchmark/src/main/python/requirements.txt | 4 +- .../ROOT/pages/util/perf-insights.adoc | 45 ++++++ .../modules/ROOT/partials/util-nav.adoc | 1 + 6 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 benchmark/src/main/python/README.adoc create mode 100644 benchmark/src/main/python/perfInsights.py create mode 100644 doc/kubernetes/modules/ROOT/pages/util/perf-insights.adoc diff --git a/.gitignore b/.gitignore index 61d503409..8ee40c44a 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,11 @@ gatling-charts-* # Grafana report pdfs benchmark/src/main/python/grafana_report_pdfs +# Python files +benchmark/src/main/python/venv +benchmark/src/main/python/results +benchmark/src/main/python/perf_insights.log + # Intellij ################### .idea diff --git a/benchmark/src/main/python/README.adoc b/benchmark/src/main/python/README.adoc new file mode 100644 index 000000000..63a47cc5a --- /dev/null +++ b/benchmark/src/main/python/README.adoc @@ -0,0 +1,3 @@ +To find out more about the Grafana dashboard-to-PDF script `snapGrafana.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/grafana#snapgrafana-py-cli-options + +To find out more about the performance analysis script `perfInsights.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/perf-insights diff --git a/benchmark/src/main/python/perfInsights.py b/benchmark/src/main/python/perfInsights.py new file mode 100644 index 000000000..4d454c654 --- /dev/null +++ b/benchmark/src/main/python/perfInsights.py @@ -0,0 +1,140 @@ +import pandas as pd +import requests +import argparse +from pandas import json_normalize +import logging +import json + +def setup_logger(log_file): + # Set up logging to a file + logging.basicConfig(filename=log_file, level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s') + logger = logging.getLogger() + return logger + +def fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger): + # GitHub API URL to list files in the directory on a specific branch + api_url = f'https://api.github.com/repos/{github_user}/{github_repo}/contents/{json_directory}?ref={branch_name}' + + # Fetch the list of files in the directory + response = requests.get(api_url) + files = response.json() + + # Dictionary to store DataFrames for each test + data_frames = { + 'memoryUsageTest': [], + 'cpuUsageForLoginsTest': [], + 'cpuUsageForCredentialGrantsTest': [] + } + + basic_df = [] + + # Fetch each JSON file and append to the corresponding list + for file in files: + if file['name'].endswith('.json'): + file_url = file['download_url'] + file_response = requests.get(file_url) + file_json = file_response.json() + df = pd.json_normalize(file_json) + basic_df.append(df) + + # Debug: log the JSON content + logger.debug("Processing file: %s", file['name']) + logger.debug("JSON content: %s", json.dumps(file_json, indent=2)) + + # Normalize the JSON to extract specific fields for each test + for test in data_frames.keys(): + if test in file_json: + df = json_normalize( + file_json, + record_path=[test, 'statistics'], + meta=[ + 'start', + 'context', + [test, 'activeSessionsPer500MbPerPod'], + [test, 'userLoginsPerSecPer1vCpuPerPod'], + [test, 'credentialGrantsPerSecPer1vCpu'] + ], + record_prefix=f'{test}.', + errors='ignore' + ) + data_frames[test].append(df) + + combined_df = pd.concat(basic_df, ignore_index=True) + perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']] + + print(perf_across_deployments_df.to_csv(index=False)) + # Concatenate all DataFrames for each test into a single DataFrame + combined_data_frames = {test: pd.concat(data_frames[test], ignore_index=True) for test in data_frames} + + # Log the columns of the combined DataFrames + for test, df in combined_data_frames.items(): + logger.debug(f"{test} DataFrame columns: {df.columns.tolist()}") + logger.debug(f"{test} DataFrame sample: {df.head()}") + + return combined_data_frames + +def save_to_csv(data_frames, json_directory, output_directory): + # Columns to include in the final CSVs for each test + columns_to_include = { + 'memoryUsageTest': [ + 'start', + 'context', + 'memoryUsageTest.name', + 'memoryUsageTest.activeSessionsPer500MbPerPod', + 'memoryUsageTest.meanResponseTime.total', + 'memoryUsageTest.percentiles1.total', + 'memoryUsageTest.meanNumberOfRequestsPerSecond.total' + ], + 'cpuUsageForLoginsTest': [ + 'start', + 'context', + 'cpuUsageForLoginsTest.name', + 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', + 'cpuUsageForLoginsTest.meanResponseTime.total', + 'cpuUsageForLoginsTest.percentiles1.total', + 'cpuUsageForLoginsTest.meanNumberOfRequestsPerSecond.total' + ], + 'cpuUsageForCredentialGrantsTest': [ + 'start', + 'context', + 'cpuUsageForCredentialGrantsTest.name', + 'cpuUsageForCredentialGrantsTest.credentialGrantsPerSecPer1vCpu', + 'cpuUsageForCredentialGrantsTest.meanResponseTime.total', + 'cpuUsageForCredentialGrantsTest.percentiles1.total', + 'cpuUsageForCredentialGrantsTest.meanNumberOfRequestsPerSecond.total' + ] + } + + for test, df in data_frames.items(): + # Reorder columns to include only the desired ones + available_columns = [col for col in columns_to_include[test] if col in df.columns] + df = df[available_columns] + + test_date = json_directory.replace("/", "_") + # Save to CSV + csv_file_path = f"{output_directory}/{test_date}_{test}_results.csv" + df.to_csv(csv_file_path, index=False) + print(f"Saved {test} results to {csv_file_path}") + +def main(): + parser = argparse.ArgumentParser(description="Process JSON files from a GitHub repository.") + parser.add_argument('json_directory', type=str, help='The directory in the GitHub repository containing JSON files.') + parser.add_argument('output_directory', type=str, help='The directory to save the CSV files.') + args = parser.parse_args() + + github_user = 'keycloak' + github_repo = 'keycloak-benchmark' + branch_name = 'result-data' + json_directory = args.json_directory + output_directory = args.output_directory + + # Set up logger + log_file = 'perf_insights.log' + logger = setup_logger(log_file) + + data_frames = fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger) + save_to_csv(data_frames, json_directory, output_directory) + +if __name__ == '__main__': + main() diff --git a/benchmark/src/main/python/requirements.txt b/benchmark/src/main/python/requirements.txt index db6d20034..d0bfbe8de 100644 --- a/benchmark/src/main/python/requirements.txt +++ b/benchmark/src/main/python/requirements.txt @@ -1,4 +1,6 @@ -playwright==1.37.0 +pandas==2.2.2 +playwright==1.45.1 +Requests==2.32.3 asyncio==3.4.3 typing==3.7.4.3 typing_extensions==4.7.1 diff --git a/doc/kubernetes/modules/ROOT/pages/util/perf-insights.adoc b/doc/kubernetes/modules/ROOT/pages/util/perf-insights.adoc new file mode 100644 index 000000000..e86b6e45e --- /dev/null +++ b/doc/kubernetes/modules/ROOT/pages/util/perf-insights.adoc @@ -0,0 +1,45 @@ += Analyzing the Continuous Performance Test results +:description: A utility to perform custom analysis on the generated results from continuous performance tests. + +{description} + +== Continuous Performance Tests +The link:{github-files}/.github/workflows/rosa-cluster-auto-provision-on-schedule.yml[ROSA Daily Scheduled Run workflow] is an automated process that ensures continuous performance testing which is scheduled to run every weekday. + +This workflow initiates by deploying a multi-AZ cluster, activating features such as external Infinispan and persistent sessions. It executes a series of functional tests to verify the system's performance and stability in an active-active configuration. + +Following these tests, a scaling benchmark assesses the system's ability to handle varying loads, providing crucial insights into performance under real-world conditions and the results are then persisted to a `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` in the benchmark's GitHub repository for further analysis. + +This automated schedule ensures consistent testing, early detection of potential issues, and continuous improvement of {project_name}'s performance. + +== Analyze the results + +We have a Python script `link:{github-files}/benchmark/src/main/python/perfInsights.py[perfInsights.py]` which allows us to analyze the results generated from the Continuous Performance tests mentioned above. + +=== Installing needed python libraries + +[source,bash] +---- +pip3 install -U -r requirements.txt +---- + +And we can check if all the requirements are satisfied using the below command. +[source,bash] +---- +python3 -m pip check +---- + +=== Usage + +Run the below command to access the results from the `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` and save the analysis to a local dir. + +[source, bash] +---- +python3 perfInsights.py +---- + +[source, bash] +---- +Example: +python3 perfInsights.py rosa_scalability/2024/07/23 results +---- diff --git a/doc/kubernetes/modules/ROOT/partials/util-nav.adoc b/doc/kubernetes/modules/ROOT/partials/util-nav.adoc index c448d32c9..bf5f703ef 100644 --- a/doc/kubernetes/modules/ROOT/partials/util-nav.adoc +++ b/doc/kubernetes/modules/ROOT/partials/util-nav.adoc @@ -1,5 +1,6 @@ ** xref:util/sqlpad.adoc[] ** xref:util/grafana.adoc[] +** xref:util/perf-insights.adoc[] ** xref:util/prometheus.adoc[] ** xref:util/otel.adoc[] ** xref:util/debugging-keycloak.adoc[]