Skip to content

Commit

Permalink
fixed union type backward compatibility with py39
Browse files Browse the repository at this point in the history
  • Loading branch information
achew010 committed Jul 8, 2024
1 parent 37eb9d7 commit 0858912
Show file tree
Hide file tree
Showing 5 changed files with 32 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ def from_quantized(
device: Optional[Union[str, int]] = None,
backend: Backend = Backend.AUTO,
use_cuda_fp16: bool = True,
quantize_config: Optional[QuantizeConfig | Dict] = None,
quantize_config: Optional[Union[QuantizeConfig, Dict]] = None,
model_basename: Optional[str] = None,
use_safetensors: bool = True,
trust_remote_code: bool = False,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,7 @@ def from_pretrained(
quantize_config: QuantizeConfig,
max_memory: Optional[dict] = None,
trust_remote_code: bool = False,
torch_dtype: [str | torch.dtype] = "auto",
torch_dtype: Union[str, torch.dtype] = "auto",
**model_init_kwargs,
):
"""load un-quantized pretrained model to cpu"""
Expand Down Expand Up @@ -880,7 +880,7 @@ def from_quantized(
device: Optional[Union[str, int]] = None,
low_cpu_mem_usage: bool = False,
backend: Backend = Backend.AUTO,
torch_dtype: [str | torch.dtype] = "auto",
torch_dtype: Union[str, torch.dtype] = "auto",
use_cuda_fp16: bool = True,
quantize_config: Optional[QuantizeConfig] = None,
model_basename: Optional[str] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
###############################################################################
# Standard
from logging import getLogger
from typing import List, Optional
from typing import List, Optional, Union
import functools
import hashlib
import json
Expand Down Expand Up @@ -79,13 +79,13 @@ def recurse_setattr(module, name, value):
recurse_setattr(getattr(module, name), rest, value)


def get_device(obj: torch.Tensor | nn.Module):
def get_device(obj: Union[torch.Tensor, nn.Module]):
if isinstance(obj, torch.Tensor):
return obj.device
return next(obj.parameters()).device


def move_to(obj: torch.Tensor | nn.Module, device: torch.device):
def move_to(obj: Union[torch.Tensor, nn.Module], device: torch.device):
if get_device(obj) != device:
obj = obj.to(device)
return obj
Expand Down
22 changes: 17 additions & 5 deletions plugins/accelerated-peft/tests/test_q4_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,25 @@
import unittest # noqa: E402

# Third Party
from fms_acceleration_peft.gptqmodel import Backend, GPTQModel # noqa: E402
from fms_acceleration_peft.gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import ( # noqa: E402
QuantLinear as TritonV2QuantLinear,
)
from transformers import AutoTokenizer # noqa: E402
import torch # noqa: E402

GENERATE_EVAL_SIZE = 100
CUDA_AVAILABLE = False
if torch.cuda.is_available():
from fms_acceleration_peft.gptqmodel import Backend, GPTQModel # noqa: E402
from fms_acceleration_peft.gptqmodel.nn_modules.qlinear.qlinear_tritonv2 import ( # noqa: E402
QuantLinear as TritonV2QuantLinear,
)
CUDA_AVAILABLE = True


GENERATE_EVAL_SIZE = 100

class TestsQ4Triton(unittest.TestCase):
@unittest.skipIf(
CUDA_AVAILABLE is False,
"Only runs if there is a cuda device available",
)
def test_generation_desc_act_false(self):
prompt = "I am in Paris and"

Expand Down Expand Up @@ -79,6 +87,10 @@ def test_generation_desc_act_false(self):
predicted_text[:GENERATE_EVAL_SIZE], reference_output[:GENERATE_EVAL_SIZE]
)

@unittest.skipIf(
CUDA_AVAILABLE is False,
"Only runs if there is a cuda device available",
)
def test_generation_desc_act_true(self):
prompt = "I am in Paris and"
device = torch.device("cuda:0")
Expand Down
10 changes: 9 additions & 1 deletion plugins/accelerated-peft/tests/test_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,15 @@
import unittest # noqa: E402

# Third Party
from fms_acceleration_peft.gptqmodel import Backend, GPTQModel # noqa: E402
from transformers import AutoTokenizer # noqa: E402
import torch # noqa: E402
import torch.utils.benchmark as benchmark # noqa: E402

CUDA_AVAILABLE = False
if torch.cuda.is_available():
from fms_acceleration_peft.gptqmodel import Backend, GPTQModel # noqa: E402
CUDA_AVAILABLE = True

MODEL_ID = "TheBloke/Llama-7B-GPTQ"
DATASET_ID = "timdettmers/openassistant-guanaco"
LEARNING_RATE = 3e-5
Expand Down Expand Up @@ -94,6 +98,10 @@ def get_model_and_tokenizer(


class TestTriton(unittest.TestCase):
@unittest.skipIf(
CUDA_AVAILABLE is False,
"Only runs if there is a cuda device available",
)
def test_triton_qlinear(self):
ref_model, _ = get_model_and_tokenizer(
model_id=MODEL_ID,
Expand Down

0 comments on commit 0858912

Please sign in to comment.