Skip to content

Commit

Permalink
fix some benchmark issues
Browse files Browse the repository at this point in the history
Signed-off-by: Yu Chin Fabian Lim <[email protected]>
  • Loading branch information
fabianlim committed Jul 31, 2024
1 parent 50b9404 commit b04e2c0
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 16 deletions.
11 changes: 9 additions & 2 deletions scripts/benchmarks/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@
RESULT_FIELD_ALLOCATED_GPU_MEM = "mem_torch_mem_alloc_in_bytes"
RESULT_FIELD_PEAK_ALLOCATED_GPU_MEM = "mem_peak_torch_mem_alloc_in_bytes"
ERROR_MESSAGES = "error_messages"
DRY_RUN_MESSAGE = "dry_run"


def extract_gpu_memory_metrics(output_metrics) -> Tuple[float]:
Expand Down Expand Up @@ -366,14 +367,19 @@ def __init__(

@property
def is_completed(self):

if not os.path.exists(self.results_filename):
return False
# otherwise open it and check for errors
with open(self.results_filename) as f:
results = json.load(f)

# return complete only if no errors
return not ERROR_MESSAGES in results
# and is not a dry run
return (
not ERROR_MESSAGES in results and
results.get(DRY_RUN_MESSAGE, False) == False
)

def run(
self,
Expand Down Expand Up @@ -558,7 +564,8 @@ def _dummy(*args, **kwargs):
def get_experiment_final_metrics(
self, final_metrics_keys: List[str] = ["train_loss", "train_runtime"]
):
return {}
# will insert a special dry run key
return {DRY_RUN_MESSAGE: True}

def maybe_get_experiment_error_traceback(self):
return None
Expand Down
31 changes: 21 additions & 10 deletions scripts/benchmarks/compare_with_reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,22 +98,33 @@ def main(
result_dir, reference_benchmark_filepath, plot_columns, threshold_ratio, indices
):
ref, args_ref = read_df(reference_benchmark_filepath, indices, plot_columns)
new_benchmark_filepath = os.path.join(result_dir, BENCHMARK_FILENAME)
df, args_df = read_df(
os.path.join(result_dir, BENCHMARK_FILENAME), indices, plot_columns
new_benchmark_filepath, indices, plot_columns
)
# Analyse between both sets of results and retrieve outliers
# - this has a side effect of plotting the charts
outliers_df, outliers, charts = compare_results(
df, ref, plot_columns, threshold_ratio=threshold_ratio
)
# Find arguments that are different between ref and new
# to highlight as possible cause of anomaly
diff = args_df.compare(args_ref, align_axis=1).rename(
columns={"self": "new", "other": "ref"}, level=-1
)
diff = diff[diff.index.isin([outlier for outlier in outliers])]
if not diff.empty:
outliers_df = outliers_df.set_index(indices).merge(
diff, left_index=True, right_index=True
# this logic is brittle and will not hold if new benchmark is not
# of the exact same format as the reference benchmark,
# so put a try-catch.
try:
# Find arguments that are different between ref and new
# to highlight as possible cause of anomaly
diff = args_df.compare(args_ref, align_axis=1).rename(
columns={"self": "new", "other": "ref"}, level=-1
)
diff = diff[diff.index.isin([outlier for outlier in outliers])]
if not diff.empty:
outliers_df = outliers_df.set_index(indices).merge(
diff, left_index=True, right_index=True
)
except ValueError:
print (
f"New '{new_benchmark_filepath}' is probably a partial bench. So unable"
"to properly compare if the arguments are consistent with old bench."
)
outliers_df.to_csv(os.path.join(result_dir, OUTLIERS_FILENAME))
for chart, filename in charts:
Expand Down
12 changes: 8 additions & 4 deletions scripts/run_benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ MEMORY_LOGGING=${MEMORY_LOGGING:-"all"}
NUM_GPUS_MATRIX=${1-"1 2"}
RESULT_DIR=${2:-"benchmark_outputs"}
SCENARIOS_CONFIG=${3:-$SCENARIOS_CONFIG}
SCENARIOS_FILTER=${4-$SCNTAG_PEFT_AUTOGPTQ}
SCENARIOS_FILTER=${4:-$SCNTAG_PEFT_AUTOGPTQ}

echo "NUM_GPUS_MATRIX: $NUM_GPUS_MATRIX"
echo "RESULT_DIR: $RESULT_DIR"
Expand Down Expand Up @@ -77,7 +77,7 @@ PIP_REQUIREMENTS_FILE=$RESULT_DIR/$PIP_REQUIREMENTS_FILE
# preload models by default
EXTRA_ARGS="--preload_models"

if [ ! -z "$SCENARIOS_FILTER" ]; then
if [ "$SCENARIOS_FILTER" != "none" ]; then
EXTRA_ARGS="$EXTRA_ARGS --run_only_scenarios $SCENARIOS_FILTER"
fi

Expand Down Expand Up @@ -137,5 +137,9 @@ PYTHONPATH=. \
'error_messages' \
'acceleration_framework_config_file'

PYTHONPATH=. \
python $WORKING_DIR/compare_with_reference.py --result_dir $RESULT_DIR
if [ "$DRY_RUN" = "true" ]; then
echo "DRY_RUN=True, will skip compare with reference logic"
else
PYTHONPATH=. \
python $WORKING_DIR/compare_with_reference.py --result_dir $RESULT_DIR
fi

0 comments on commit b04e2c0

Please sign in to comment.