Skip to content

Commit

Permalink
Turn statistics into GAP Records (#166)
Browse files Browse the repository at this point in the history
* Changing record names to match GAP and adding some missing type checking

* Fixing other unit tests

* Updating time to first token records

* Updating inter token latency records

* Updaing output token throughput record

* Adding output token throughput per request records

* Adding output sequence length (OSL) records

* Adding Input sequence length (ISL) records

* Removing non-GAP records

* Adding telemetry records

* Fixing unit testing

* Adding request goodput record

* Adding method to create records from statistics

* Added very basic unit testing

* Remove demo file (accidental commit)

* Fix codeql error
  • Loading branch information
nv-braf authored Nov 7, 2024
1 parent a88a77e commit 38579a5
Show file tree
Hide file tree
Showing 104 changed files with 2,128 additions and 2,541 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from typing import Any, Dict, Optional, TypeAlias

from genai_perf.record.record import Record
from genai_perf.record.types.request_throughput_avg import RequestThroughputAvg
from genai_perf.types import (
CheckpointObject,
MetricObjectives,
Expand All @@ -39,7 +40,7 @@
###########################################################################
@dataclass(frozen=True)
class ModelConfigMeasurementDefaults:
METRIC_OBJECTIVE = {"perf_throughput": 1.0}
METRIC_OBJECTIVE = {RequestThroughputAvg.tag: 1.0}

SELF_IS_BETTER = 1
OTHER_IS_BETTER = -1
Expand Down
27 changes: 27 additions & 0 deletions genai-perf/genai_perf/metrics/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,10 @@

import numpy as np
import pandas as pd
from genai_perf.exceptions import GenAIPerfException
from genai_perf.metrics.metrics import Metrics
from genai_perf.metrics.telemetry_metrics import TelemetryMetrics
from genai_perf.record.record import Record, RecordType


class Statistics:
Expand Down Expand Up @@ -192,3 +194,28 @@ def export_parquet(self, artifact_dir: Path, filename: str) -> None:

filepath = artifact_dir / f"{filename}.gzip"
df.to_parquet(filepath, compression="gzip")

def create_records(self) -> List[Record]:
"""
Populates and returns a list of Records
"""
statistic_records = []
for metric_base_name, metric_info in self.stats_dict.items():
for metric_post_name, metric_value in metric_info.items():
if metric_post_name == "unit":
continue

metric_name = metric_base_name + "_" + metric_post_name

try:
new_record = RecordType.get_all_record_types()[metric_name](
metric_value
)
except KeyError:
raise GenAIPerfException(
f"{metric_name} is not a valid Record tag."
)

statistic_records.append(new_record)

return statistic_records
20 changes: 4 additions & 16 deletions genai-perf/genai_perf/record/record.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from typing import Dict, Union

from genai_perf.exceptions import GenAIPerfException
from genai_perf.types import RecordValue


class RecordType(ABCMeta):
Expand Down Expand Up @@ -90,16 +91,7 @@ class Record(metaclass=RecordType):
records
"""

def __init__(self, value: Union[float, int], timestamp: int):
"""
Parameters
----------
value : float or int
The value of the GPU metric
timestamp : int
The timestamp for the record in nanoseconds
"""

def __init__(self, value: RecordValue, timestamp: int):
assert type(value) is float or type(value) is int
assert type(timestamp) is int

Expand All @@ -124,16 +116,12 @@ def aggregation_function():
def value_function() -> float:
"""
Returns the average value from a list
Returns
-------
Average value of the list
"""
return mean # type: ignore

@staticmethod
@abstractmethod
def header(aggregation_tag=False):
def header(aggregation_tag=False) -> str:
"""
Parameters
----------
Expand Down Expand Up @@ -170,7 +158,7 @@ def create_class_from_checkpoint(cls, record_dict) -> "Record":
setattr(record, key, record_dict[key])
return record

def value(self) -> Union[float, int]:
def value(self) -> RecordValue:
"""
This method returns the value of recorded metric
Expand Down
91 changes: 0 additions & 91 deletions genai-perf/genai_perf/record/types/cpu_available_ram.py

This file was deleted.

91 changes: 0 additions & 91 deletions genai-perf/genai_perf/record/types/cpu_used_ram.py

This file was deleted.

56 changes: 56 additions & 0 deletions genai-perf/genai_perf/record/types/gpu_energy_consumption.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from genai_perf.record.gpu_record import DecreasingGPURecord


@total_ordering
class GPUEnergyConsumption(DecreasingGPURecord):
"""
GPU's energy consumption metric
"""

tag = "energy_consumption"

def __init__(self, value, device_uuid=None, timestamp=0):
super().__init__(value, device_uuid, timestamp)

@staticmethod
def aggregation_function():
def average(seq):
return sum(seq[1:], start=seq[0]) / len(seq)

return average

@staticmethod
def header(aggregation_tag=False):
return ("Average " if aggregation_tag else "") + "GPU Energy Consumption (MJ)"

def __eq__(self, other: "GPUEnegryConsumption") -> bool: # type: ignore
return self.value() == other.value()

def __lt__(self, other: "GPUEnergyConsumption") -> bool:
return other.value() < self.value()

def __add__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption":
return GPUEnergyConsumption(
device_uuid=None, value=(self.value() + other.value())
)

def __sub__(self, other: "GPUEnergyConsumption") -> "GPUEnergyConsumption":
return GPUEnergyConsumption(
device_uuid=None, value=(other.value() - self.value())
)
Loading

0 comments on commit 38579a5

Please sign in to comment.