-
Notifications
You must be signed in to change notification settings - Fork 74
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add new perfInsights.py script to analyse the data generated in resul…
…t_data branch (#905) Signed-off-by: Kamesh Akella <[email protected]> Signed-off-by: Alexander Schwartz <[email protected]> Co-authored-by: Alexander Schwartz <[email protected]>
- Loading branch information
Showing
6 changed files
with
197 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
To find out more about the Grafana dashboard-to-PDF script `snapGrafana.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/grafana#snapgrafana-py-cli-options | ||
|
||
To find out more about the performance analysis script `perfInsights.py`, visit https://www.keycloak.org/keycloak-benchmark/kubernetes-guide/latest/util/perf-insights |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,140 @@ | ||
import pandas as pd | ||
import requests | ||
import argparse | ||
from pandas import json_normalize | ||
import logging | ||
import json | ||
|
||
def setup_logger(log_file): | ||
# Set up logging to a file | ||
logging.basicConfig(filename=log_file, level=logging.DEBUG, | ||
format='%(asctime)s %(levelname)s %(message)s') | ||
logger = logging.getLogger() | ||
return logger | ||
|
||
def fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger): | ||
# GitHub API URL to list files in the directory on a specific branch | ||
api_url = f'https://api.github.com/repos/{github_user}/{github_repo}/contents/{json_directory}?ref={branch_name}' | ||
|
||
# Fetch the list of files in the directory | ||
response = requests.get(api_url) | ||
files = response.json() | ||
|
||
# Dictionary to store DataFrames for each test | ||
data_frames = { | ||
'memoryUsageTest': [], | ||
'cpuUsageForLoginsTest': [], | ||
'cpuUsageForCredentialGrantsTest': [] | ||
} | ||
|
||
basic_df = [] | ||
|
||
# Fetch each JSON file and append to the corresponding list | ||
for file in files: | ||
if file['name'].endswith('.json'): | ||
file_url = file['download_url'] | ||
file_response = requests.get(file_url) | ||
file_json = file_response.json() | ||
df = pd.json_normalize(file_json) | ||
basic_df.append(df) | ||
|
||
# Debug: log the JSON content | ||
logger.debug("Processing file: %s", file['name']) | ||
logger.debug("JSON content: %s", json.dumps(file_json, indent=2)) | ||
|
||
# Normalize the JSON to extract specific fields for each test | ||
for test in data_frames.keys(): | ||
if test in file_json: | ||
df = json_normalize( | ||
file_json, | ||
record_path=[test, 'statistics'], | ||
meta=[ | ||
'start', | ||
'context', | ||
[test, 'activeSessionsPer500MbPerPod'], | ||
[test, 'userLoginsPerSecPer1vCpuPerPod'], | ||
[test, 'credentialGrantsPerSecPer1vCpu'] | ||
], | ||
record_prefix=f'{test}.', | ||
errors='ignore' | ||
) | ||
data_frames[test].append(df) | ||
|
||
combined_df = pd.concat(basic_df, ignore_index=True) | ||
perf_across_deployments_df = combined_df[['start', 'context.externalInfinispanFeatureEnabled', 'context.persistentSessionsEnabled', 'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', 'credentialGrantsPerSecPer1vCpu', 'memoryUsageTest.activeSessionsPer500MbPerPod']] | ||
|
||
print(perf_across_deployments_df.to_csv(index=False)) | ||
# Concatenate all DataFrames for each test into a single DataFrame | ||
combined_data_frames = {test: pd.concat(data_frames[test], ignore_index=True) for test in data_frames} | ||
|
||
# Log the columns of the combined DataFrames | ||
for test, df in combined_data_frames.items(): | ||
logger.debug(f"{test} DataFrame columns: {df.columns.tolist()}") | ||
logger.debug(f"{test} DataFrame sample: {df.head()}") | ||
|
||
return combined_data_frames | ||
|
||
def save_to_csv(data_frames, json_directory, output_directory): | ||
# Columns to include in the final CSVs for each test | ||
columns_to_include = { | ||
'memoryUsageTest': [ | ||
'start', | ||
'context', | ||
'memoryUsageTest.name', | ||
'memoryUsageTest.activeSessionsPer500MbPerPod', | ||
'memoryUsageTest.meanResponseTime.total', | ||
'memoryUsageTest.percentiles1.total', | ||
'memoryUsageTest.meanNumberOfRequestsPerSecond.total' | ||
], | ||
'cpuUsageForLoginsTest': [ | ||
'start', | ||
'context', | ||
'cpuUsageForLoginsTest.name', | ||
'cpuUsageForLoginsTest.userLoginsPerSecPer1vCpuPerPod', | ||
'cpuUsageForLoginsTest.meanResponseTime.total', | ||
'cpuUsageForLoginsTest.percentiles1.total', | ||
'cpuUsageForLoginsTest.meanNumberOfRequestsPerSecond.total' | ||
], | ||
'cpuUsageForCredentialGrantsTest': [ | ||
'start', | ||
'context', | ||
'cpuUsageForCredentialGrantsTest.name', | ||
'cpuUsageForCredentialGrantsTest.credentialGrantsPerSecPer1vCpu', | ||
'cpuUsageForCredentialGrantsTest.meanResponseTime.total', | ||
'cpuUsageForCredentialGrantsTest.percentiles1.total', | ||
'cpuUsageForCredentialGrantsTest.meanNumberOfRequestsPerSecond.total' | ||
] | ||
} | ||
|
||
for test, df in data_frames.items(): | ||
# Reorder columns to include only the desired ones | ||
available_columns = [col for col in columns_to_include[test] if col in df.columns] | ||
df = df[available_columns] | ||
|
||
test_date = json_directory.replace("/", "_") | ||
# Save to CSV | ||
csv_file_path = f"{output_directory}/{test_date}_{test}_results.csv" | ||
df.to_csv(csv_file_path, index=False) | ||
print(f"Saved {test} results to {csv_file_path}") | ||
|
||
def main(): | ||
parser = argparse.ArgumentParser(description="Process JSON files from a GitHub repository.") | ||
parser.add_argument('json_directory', type=str, help='The directory in the GitHub repository containing JSON files.') | ||
parser.add_argument('output_directory', type=str, help='The directory to save the CSV files.') | ||
args = parser.parse_args() | ||
|
||
github_user = 'keycloak' | ||
github_repo = 'keycloak-benchmark' | ||
branch_name = 'result-data' | ||
json_directory = args.json_directory | ||
output_directory = args.output_directory | ||
|
||
# Set up logger | ||
log_file = 'perf_insights.log' | ||
logger = setup_logger(log_file) | ||
|
||
data_frames = fetch_and_process_json(github_user, github_repo, branch_name, json_directory, logger) | ||
save_to_csv(data_frames, json_directory, output_directory) | ||
|
||
if __name__ == '__main__': | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
playwright==1.37.0 | ||
pandas==2.2.2 | ||
playwright==1.45.1 | ||
Requests==2.32.3 | ||
asyncio==3.4.3 | ||
typing==3.7.4.3 | ||
typing_extensions==4.7.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
= Analyzing the Continuous Performance Test results | ||
:description: A utility to perform custom analysis on the generated results from continuous performance tests. | ||
|
||
{description} | ||
|
||
== Continuous Performance Tests | ||
The link:{github-files}/.github/workflows/rosa-cluster-auto-provision-on-schedule.yml[ROSA Daily Scheduled Run workflow] is an automated process that ensures continuous performance testing which is scheduled to run every weekday. | ||
|
||
This workflow initiates by deploying a multi-AZ cluster, activating features such as external Infinispan and persistent sessions. It executes a series of functional tests to verify the system's performance and stability in an active-active configuration. | ||
|
||
Following these tests, a scaling benchmark assesses the system's ability to handle varying loads, providing crucial insights into performance under real-world conditions and the results are then persisted to a `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` in the benchmark's GitHub repository for further analysis. | ||
|
||
This automated schedule ensures consistent testing, early detection of potential issues, and continuous improvement of {project_name}'s performance. | ||
|
||
== Analyze the results | ||
|
||
We have a Python script `link:{github-files}/benchmark/src/main/python/perfInsights.py[perfInsights.py]` which allows us to analyze the results generated from the Continuous Performance tests mentioned above. | ||
|
||
=== Installing needed python libraries | ||
|
||
[source,bash] | ||
---- | ||
pip3 install -U -r requirements.txt | ||
---- | ||
|
||
And we can check if all the requirements are satisfied using the below command. | ||
[source,bash] | ||
---- | ||
python3 -m pip check | ||
---- | ||
|
||
=== Usage | ||
|
||
Run the below command to access the results from the `https://github.com/keycloak/keycloak-benchmark/tree/result-data/rosa_scalability[result-data branch]` and save the analysis to a local dir. | ||
|
||
[source, bash] | ||
---- | ||
python3 perfInsights.py <result_json_dir_path_on_github> <output_dir_path> | ||
---- | ||
|
||
[source, bash] | ||
---- | ||
Example: | ||
python3 perfInsights.py rosa_scalability/2024/07/23 results | ||
---- |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters