Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix ort config instantiation (from_pretrained) and saving (save_pretrained) #1865

Merged
merged 4 commits into from
May 28, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 18 additions & 15 deletions .github/workflows/test_cli.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ name: Optimum CLI / Python - Test

on:
push:
branches: [ main ]
branches: [main]
pull_request:
branches: [ main ]
branches: [main]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
Expand All @@ -22,17 +22,20 @@ jobs:

runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[tests,exporters,exporters-tf]
- name: Test with unittest
working-directory: tests
run: |
python -m unittest discover -s cli -p 'test_*.py'
- name: Checkout code
uses: actions/checkout@v4

- name: Setup Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install --upgrade pip
pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install .[tests,exporters,exporters-tf]

- name: Test with pytest
run: |
pytest tests/cli -s -vvvv --durations=0
2 changes: 1 addition & 1 deletion optimum/commands/onnxruntime/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def run(self):
"TensorRT quantization relies on static quantization that requires calibration, which is currently not supported through optimum-cli. Please adapt Optimum static quantization examples to run static quantization for TensorRT: https://github.com/huggingface/optimum/tree/main/examples/onnxruntime/quantization"
)
else:
qconfig = ORTConfig.from_pretained(self.args.config).quantization
qconfig = ORTConfig.from_pretrained(self.args.config).quantization

for q in quantizers:
q.quantize(save_dir=save_dir, quantization_config=qconfig)
45 changes: 42 additions & 3 deletions optimum/onnxruntime/configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from dataclasses import asdict, dataclass, field
from enum import Enum
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Union
from typing import Any, Dict, List, Optional, Tuple, Union

from datasets import Dataset
from packaging.version import Version, parse
Expand Down Expand Up @@ -298,6 +298,15 @@ def __post_init__(self):
)
self.operators_to_quantize = operators_to_quantize

if isinstance(self.format, str):
self.format = QuantFormat[self.format]
if isinstance(self.mode, str):
self.mode = QuantizationMode[self.mode]
if isinstance(self.activations_dtype, str):
self.activations_dtype = QuantType[self.activations_dtype]
if isinstance(self.weights_dtype, str):
self.weights_dtype = QuantType[self.weights_dtype]

Comment on lines +301 to +309
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

post init : str -> enums

@staticmethod
def quantization_type_str(activations_dtype: QuantType, weights_dtype: QuantType) -> str:
return (
Expand Down Expand Up @@ -984,8 +993,24 @@ def __init__(
self.opset = opset
self.use_external_data_format = use_external_data_format
self.one_external_file = one_external_file
self.optimization = self.dataclass_to_dict(optimization)
self.quantization = self.dataclass_to_dict(quantization)

if isinstance(optimization, dict):
self.optimization = OptimizationConfig(**optimization)
elif isinstance(optimization, OptimizationConfig) or optimization is None:
self.optimization = optimization
else:
raise ValueError(
f"Optional argument `optimization` must be a dictionary or an instance of OptimizationConfig, got {type(optimization)}"
)
if isinstance(quantization, dict):
self.quantization = QuantizationConfig(**quantization)
elif isinstance(quantization, QuantizationConfig) or quantization is None:
self.quantization = quantization
else:
raise ValueError(
f"Optional argument `quantization` must be a dictionary or an instance of QuantizationConfig, got {type(quantization)}"
)

self.optimum_version = kwargs.pop("optimum_version", None)

@staticmethod
Expand All @@ -1002,3 +1027,17 @@ def dataclass_to_dict(config) -> dict:
v = [elem.name if isinstance(elem, Enum) else elem for elem in v]
new_config[k] = v
return new_config

def to_dict(self) -> Dict[str, Any]:
dict_config = {
"opset": self.opset,
"use_external_data_format": self.use_external_data_format,
"one_external_file": self.one_external_file,
"optimization": self.dataclass_to_dict(self.optimization),
"quantization": self.dataclass_to_dict(self.quantization),
}

if self.optimum_version:
dict_config["optimum_version"] = self.optimum_version

return dict_config
Comment on lines +1035 to +1047
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

called by the mixin at save time (BaseConfig)

31 changes: 16 additions & 15 deletions tests/cli/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@
import unittest
from pathlib import Path

from onnxruntime import __version__ as ort_version
from packaging.version import Version, parse

import optimum.commands
from optimum.onnxruntime.configuration import ORTConfig, AutoQuantizationConfig


CLI_WIH_CUSTOM_COMMAND_PATH = Path(__file__).parent / "cli_with_custom_command.py"
Expand Down Expand Up @@ -83,30 +81,33 @@ def test_optimize_commands(self):

def test_quantize_commands(self):
with tempfile.TemporaryDirectory() as tempdir:
ort_config = ORTConfig(quantization=AutoQuantizationConfig.avx2(is_static=False))
ort_config.save_pretrained(tempdir)

# First export a tiny encoder, decoder only and encoder-decoder
export_commands = [
f"optimum-cli export onnx --model hf-internal-testing/tiny-random-BertModel {tempdir}/encoder",
f"optimum-cli export onnx --model hf-internal-testing/tiny-random-bert {tempdir}/encoder",
f"optimum-cli export onnx --model hf-internal-testing/tiny-random-gpt2 {tempdir}/decoder",
# f"optimum-cli export onnx --model hf-internal-testing/tiny-random-t5 {tempdir}/encoder-decoder",
f"optimum-cli export onnx --model hf-internal-testing/tiny-random-t5 {tempdir}/encoder-decoder",
]
quantize_commands = [
f"optimum-cli onnxruntime quantize --onnx_model {tempdir}/encoder --avx2 -o {tempdir}/quantized_encoder",
f"optimum-cli onnxruntime quantize --onnx_model {tempdir}/decoder --avx2 -o {tempdir}/quantized_decoder",
# f"optimum-cli onnxruntime quantize --onnx_model {tempdir}/encoder-decoder --avx2 -o {tempdir}/quantized_encoder_decoder",
f"optimum-cli onnxruntime quantize --onnx_model {tempdir}/encoder-decoder --avx2 -o {tempdir}/quantized_encoder_decoder",
]

if parse(ort_version) != Version("1.16.0") and parse(ort_version) != Version("1.17.0"):
# Failing on onnxruntime==1.17.0, will be fixed on 1.17.1: https://github.com/microsoft/onnxruntime/pull/19421
export_commands.append(
f"optimum-cli export onnx --model hf-internal-testing/tiny-random-t5 {tempdir}/encoder-decoder"
)
quantize_commands.append(
f"optimum-cli onnxruntime quantize --onnx_model {tempdir}/encoder-decoder --avx2 -o {tempdir}/quantized_encoder_decoder"
)
quantize_with_config_commands = [
f"optimum-cli onnxruntime quantize --onnx_model hf-internal-testing/tiny-random-bert --c {tempdir}/ort_config.json -o {tempdir}/quantized_encoder_with_config",
f"optimum-cli onnxruntime quantize --onnx_model hf-internal-testing/tiny-random-gpt2 --c {tempdir}/ort_config.json -o {tempdir}/quantized_decoder_with_config",
f"optimum-cli onnxruntime quantize --onnx_model hf-internal-testing/tiny-random-t5 --c {tempdir}/ort_config.json -o {tempdir}/quantized_encoder_decoder_with_config",
]

for export, quantize in zip(export_commands, quantize_commands):
for export, quantize, quantize_with_config in zip(
export_commands, quantize_commands, quantize_with_config_commands
):
subprocess.run(export, shell=True, check=True)
subprocess.run(quantize, shell=True, check=True)
subprocess.run(quantize_with_config, shell=True, check=True)

def _run_command_and_check_content(self, command: str, content: str) -> bool:
proc = subprocess.Popen(command.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
Expand Down
Loading