Skip to content

Commit

Permalink
Cl mod benchmark (ModelCloud#944)
Browse files Browse the repository at this point in the history
* add torch inference benchmark

* add torch benchmark test

* code review

* code review

* update model path

* code clean up

* update torch value

* code clean

* code clean

* code opt

* code opt

* code refactor

* code opt

* add other test

* update model

* add d_type

* fix torch dtype

* rm dtype

* cleanup

* cleanup

* code opt

* code clean up

* update model id

* update code

* update score

---------

Co-authored-by: LRL-ModelCloud <[email protected]>
  • Loading branch information
CL-ModelCloud and LRL-ModelCloud authored Dec 20, 2024
1 parent 33791ce commit 94a3911
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
18 changes: 18 additions & 0 deletions tests/benchmark/benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from benchmark_test import BenchmarkTest
from gptqmodel import BACKEND
from parameterized import parameterized # noqa: E402


class TestInference(BenchmarkTest):
@parameterized.expand(
[
(BACKEND.TORCH, 'cuda', 292.50),
(BACKEND.TORCH, 'cpu', 5.50),
(BACKEND.TORCH, 'xpu', 58.20),
(BACKEND.TORCH, 'mps', 3.40),
]
)
def test_inference(self, backend, device, tokens_per_second):
if device == 'mps':
self.skipTest(f"MacOS env skip")
self.benchmark(backend=backend, device=device, tokens_per_second=tokens_per_second)
6 changes: 3 additions & 3 deletions tests/benchmark/benchmark_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@


class BenchmarkTest(unittest.TestCase):
MODEL_id = "/monster/data/model/gptq_4bits_11-21_15-47-09_maxlen2048_ns2048_descFalse_damp0.1"
MIN_NEW_TOEKNS = 100
MODEL_id = "/monster/data/model/Llama-3.2-1B-Instruct-gptqmodel-4bit-vortext-v1"
MIN_NEW_TOEKNS = 10
NUM_RUNS = 10
PROMPTS = [
"I am in Paris and I",
Expand All @@ -25,7 +25,7 @@ class BenchmarkTest(unittest.TestCase):
"Which is the most widely used Internet search engine in the world?",
"What is the official language of France?",
]
MAX_DELTA_FLOOR_PERCENT = 0.15
MAX_DELTA_FLOOR_PERCENT = 0.25
MAX_POSITIVE_DELTA_CEIL_PERCENT = 1.0

def benchmark(self, backend, device, tokens_per_second):
Expand Down

0 comments on commit 94a3911

Please sign in to comment.