From e8b03321035ea19001bcbb773444e3f0574d4150 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Fri, 15 Nov 2024 17:15:33 +0100 Subject: [PATCH 01/20] Update Dockerfile base image (#2089) upgrade base image --- docs/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Dockerfile b/docs/Dockerfile index 29ea0f916ce..d76dc50c556 100644 --- a/docs/Dockerfile +++ b/docs/Dockerfile @@ -1,4 +1,4 @@ -FROM nikolaik/python-nodejs:python3.8-nodejs18 +FROM nikolaik/python-nodejs:python3.9-nodejs18 ARG commit_sha ARG clone_url From c513437511e51ccedb4f28c30e6aea9c0cf76a4a Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Mon, 18 Nov 2024 13:47:29 +0100 Subject: [PATCH 02/20] Add transformers 4.36 tests (#2085) * add transformers 4.36 tests * add test depending on tranformers version * add min transformers required version for gemma * update macos * fix whisper test * add opt * fix mpt * add comment * add granite testwhen supported by transformers --- .github/workflows/test_onnxruntime.yml | 4 ++- optimum/exporters/onnx/model_configs.py | 4 ++- setup.py | 10 +++---- tests/onnxruntime/test_modeling.py | 37 +++++++++++++++---------- 4 files changed, 33 insertions(+), 22 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index 089300f7cd9..fec5c7e5b27 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -18,8 +18,10 @@ jobs: fail-fast: false matrix: transformers-version: ["latest"] - os: [ubuntu-20.04, windows-2019, macos-13] + os: [ubuntu-20.04, windows-2019, macos-15] include: + - transformers-version: "4.36.*" + os: ubuntu-20.04 - transformers-version: "4.45.*" os: ubuntu-20.04 diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index cc752779d30..6b92109b7b6 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -295,7 +295,7 @@ class Qwen2OnnxConfig(LlamaOnnxConfig): class GemmaOnnxConfig(LlamaOnnxConfig): DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, GemmaDummyPastKeyValuesGenerator) DUMMY_PKV_GENERATOR_CLASS = GemmaDummyPastKeyValuesGenerator - pass + MIN_TRANSFORMERS_VERSION = version.parse("4.38.0") class GraniteOnnxConfig(LlamaOnnxConfig): @@ -348,6 +348,8 @@ def patch_model_for_export( class MPTOnnxConfig(TextDecoderOnnxConfig): # MPT does not require position_ids input. DEFAULT_ONNX_OPSET = 13 + # TODO: fix inference for transformers < v4.41 for beam_search > 1 + MIN_TRANSFORMERS_VERSION = version.parse("4.41.0") NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args( num_attention_heads="n_heads", hidden_size="d_model", num_layers="n_layers" ) diff --git a/setup.py b/setup.py index 7ea0da56c29..29f97b604e0 100644 --- a/setup.py +++ b/setup.py @@ -54,7 +54,7 @@ "datasets>=1.2.1", "evaluate", "protobuf>=3.20.1", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "onnxruntime-gpu": [ "onnx", @@ -63,19 +63,19 @@ "evaluate", "protobuf>=3.20.1", "accelerate", # ORTTrainer requires it. - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters": [ "onnx", "onnxruntime", "timm", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters-gpu": [ "onnx", "onnxruntime-gpu", "timm", - "transformers<4.47.0", + "transformers>=4.36,<4.47.0", ], "exporters-tf": [ "tensorflow>=2.4,<=2.12.1", @@ -86,7 +86,7 @@ "h5py", "numpy<1.24.0", "datasets<=2.16", - "transformers>=4.26,<4.38", + "transformers>=4.36,<4.38", ], "diffusers": ["diffusers"], "intel": "optimum-intel>=1.18.0", diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index a335e014478..84ac27029f9 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -2318,21 +2318,28 @@ class ORTModelForCausalLMIntegrationTest(ORTModelTestMixin): "bloom", "codegen", "falcon", - "gemma", "gpt2", "gpt_bigcode", "gpt_neo", "gpt_neox", "gptj", - "granite", "llama", "mistral", - "mpt", "opt", ] - if check_if_transformers_greater("4.40"): - SUPPORTED_ARCHITECTURES.extend(["gemma", "phi3", "qwen2"]) + if check_if_transformers_greater("4.37"): + SUPPORTED_ARCHITECTURES.append("qwen2") + + if check_if_transformers_greater("4.38"): + SUPPORTED_ARCHITECTURES.append("gemma") + + # TODO: fix "mpt" for which inference fails for transformers < v4.41 + if check_if_transformers_greater("4.41"): + SUPPORTED_ARCHITECTURES.extend(["phi3", "mpt"]) + + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES.append("granite") FULL_GRID = { "model_arch": SUPPORTED_ARCHITECTURES, @@ -2445,7 +2452,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach transformers_model = AutoModelForCausalLM.from_pretrained(model_id) transformers_model = transformers_model.eval() tokenizer = get_preprocessor(model_id) - tokens = tokenizer("This is a sample output", return_tensors="pt") + tokens = tokenizer("This is a sample input", return_tensors="pt") position_ids = None if model_arch.replace("_", "-") in MODEL_TYPES_REQUIRING_POSITION_IDS: input_shape = tokens["input_ids"].shape @@ -2467,7 +2474,7 @@ def test_compare_to_transformers(self, test_name: str, model_arch: str, use_cach # Compare batched generation. tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.padding_side = "left" - tokens = tokenizer(["Today is a nice day and I am longer", "This is me"], return_tensors="pt", padding=True) + tokens = tokenizer(["This is", "This is a sample input"], return_tensors="pt", padding=True) onnx_model.generation_config.eos_token_id = None transformers_model.generation_config.eos_token_id = None onnx_model.config.eos_token_id = None @@ -4598,14 +4605,14 @@ def test_compare_with_and_without_past_key_values(self, model_arch: str): ) self.assertTrue(torch.equal(outputs_model_with_pkv, outputs_model_without_pkv)) - self.assertEqual( - outputs_model_with_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) - self.assertEqual( - outputs_model_without_pkv.shape[1], - self.GENERATION_LENGTH + 2 if model_arch == "whisper" else self.GENERATION_LENGTH + 1, - ) + + if model_arch == "whisper" and check_if_transformers_greater("4.43"): + gen_length = self.GENERATION_LENGTH + 2 + else: + gen_length = self.GENERATION_LENGTH + 1 + + self.assertEqual(outputs_model_with_pkv.shape[1], gen_length) + self.assertEqual(outputs_model_without_pkv.shape[1], gen_length) self.GENERATION_LENGTH = generation_length if os.environ.get("TEST_LEVEL", 0) == "1": From 400bb82f312016b0a31b342d48b00d031786417d Mon Sep 17 00:00:00 2001 From: Tom Aarsen <37621491+tomaarsen@users.noreply.github.com> Date: Mon, 18 Nov 2024 15:05:37 +0100 Subject: [PATCH 03/20] [`fix`] Allow ORTQuantizer over models with subfolder ONNX files (#2094) * Allow ORTQuantizer over models with subfolder ONNX files * Also catch ValueError as that seems a common fail when AutoConfig.from_pretrained("does/not/exist") * Use test case that previously failed --- optimum/onnxruntime/quantization.py | 9 +++++---- tests/onnxruntime/test_quantization.py | 8 ++++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py index 056123f8d8e..f637916dcd2 100644 --- a/optimum/onnxruntime/quantization.py +++ b/optimum/onnxruntime/quantization.py @@ -100,7 +100,7 @@ def __init__(self, onnx_model_path: Path, config: Optional["PretrainedConfig"] = if self.config is None: try: self.config = AutoConfig.from_pretrained(self.onnx_model_path.parent) - except OSError: + except (OSError, ValueError): LOGGER.warning( f"Could not load the config for {self.onnx_model_path} automatically, this might make " "the quantized model harder to use because it will not be able to be loaded by an ORTModel without " @@ -134,6 +134,7 @@ def from_pretrained( model_or_path = Path(model_or_path) path = None + config = None if isinstance(model_or_path, ORTModelForConditionalGeneration): raise NotImplementedError(ort_quantizer_error_message) elif isinstance(model_or_path, Path) and file_name is None: @@ -147,13 +148,13 @@ def from_pretrained( file_name = onnx_files[0].name if isinstance(model_or_path, ORTModel): - if path is None: - path = Path(model_or_path.model._model_path) + path = Path(model_or_path.model._model_path) + config = model_or_path.config elif os.path.isdir(model_or_path): path = Path(model_or_path) / file_name else: raise ValueError(f"Unable to load model from {model_or_path}.") - return cls(path) + return cls(path, config=config) def fit( self, diff --git a/tests/onnxruntime/test_quantization.py b/tests/onnxruntime/test_quantization.py index b6f1ebb70f6..34a9504f95a 100644 --- a/tests/onnxruntime/test_quantization.py +++ b/tests/onnxruntime/test_quantization.py @@ -30,6 +30,7 @@ AutoQuantizationConfig, ORTConfig, ORTModelForCausalLM, + ORTModelForFeatureExtraction, ORTModelForSeq2SeqLM, ORTModelForSequenceClassification, ORTQuantizer, @@ -52,6 +53,13 @@ class ORTQuantizerTest(unittest.TestCase): "optimum/distilbert-base-uncased-finetuned-sst-2-english" ) }, + "ort_model_with_onnx_model_in_subfolder": { + "model_or_path": ORTModelForFeatureExtraction.from_pretrained( + "sentence-transformers/all-MiniLM-L6-v2", + subfolder="onnx", + file_name="model.onnx", + ) + }, } @parameterized.expand(LOAD_CONFIGURATION.items()) From a7a807c9e712fd9669865358e34c1de072b78d8e Mon Sep 17 00:00:00 2001 From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> Date: Tue, 19 Nov 2024 13:10:57 +0100 Subject: [PATCH 04/20] SD3 and Flux support (#2073) * sd3 support * unsupported cli model types * flux transformer support, unet export fixes, updated callback test, updated negative prompt test, flux and sd3 tests * fixes * move input generators * dummy diffusers * style * sd3 support * unsupported cli model types * flux transformer support, unet export fixes, updated callback test, updated negative prompt test, flux and sd3 tests * fixes * move input generators * dummy diffusers * style * distribute ort tests * fix * fix * fix * test num images * single process to reduce re-exports * test * revert unnecessary changes * T5Encoder inherits from TextEncoder * style * fix typo in timestep * style * only test sd3 and flux on latest transformers * conditional sd3 and flux modeling * forgot sd3 inpaint --- .github/workflows/test_onnxruntime.yml | 13 +- optimum/exporters/onnx/base.py | 1 + optimum/exporters/onnx/convert.py | 4 + optimum/exporters/onnx/model_configs.py | 123 +++++++++-- optimum/exporters/tasks.py | 29 ++- optimum/exporters/utils.py | 190 +++++++++++------ optimum/onnxruntime/__init__.py | 72 +++++-- optimum/onnxruntime/modeling_diffusion.py | 202 +++++++++++++++++-- optimum/utils/__init__.py | 7 + optimum/utils/constant.py | 4 +- optimum/utils/dummy_diffusers_objects.py | 74 ++++++- optimum/utils/input_generators.py | 81 +++++++- tests/exporters/exporters_utils.py | 4 +- tests/exporters/onnx/test_onnx_export.py | 2 - tests/onnxruntime/test_diffusion.py | 192 +++++++++++------- tests/onnxruntime/test_modeling.py | 2 +- tests/onnxruntime/test_quantization.py | 4 +- tests/onnxruntime/utils_onnxruntime_tests.py | 4 +- 18 files changed, 791 insertions(+), 217 deletions(-) diff --git a/.github/workflows/test_onnxruntime.yml b/.github/workflows/test_onnxruntime.yml index fec5c7e5b27..b20a3b46f88 100644 --- a/.github/workflows/test_onnxruntime.yml +++ b/.github/workflows/test_onnxruntime.yml @@ -26,14 +26,11 @@ jobs: os: ubuntu-20.04 runs-on: ${{ matrix.os }} + steps: - name: Free Disk Space (Ubuntu) if: matrix.os == 'ubuntu-20.04' uses: jlumbroso/free-disk-space@main - with: - tool-cache: false - swap-storage: false - large-packages: false - name: Checkout code uses: actions/checkout@v4 @@ -54,13 +51,11 @@ jobs: run: pip install transformers==${{ matrix.transformers-version }} - name: Test with pytest (in series) - working-directory: tests run: | - pytest onnxruntime -m "run_in_series" --durations=0 -vvvv -s + pytest tests/onnxruntime -m "run_in_series" --durations=0 -vvvv -s - name: Test with pytest (in parallel) + run: | + pytest tests/onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto env: HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} - working-directory: tests - run: | - pytest onnxruntime -m "not run_in_series" --durations=0 -vvvv -s -n auto diff --git a/optimum/exporters/onnx/base.py b/optimum/exporters/onnx/base.py index 8cd94194ffe..7e35691d54b 100644 --- a/optimum/exporters/onnx/base.py +++ b/optimum/exporters/onnx/base.py @@ -319,6 +319,7 @@ def fix_dynamic_axes( input_shapes = {} dummy_inputs = self.generate_dummy_inputs(framework="np", **input_shapes) dummy_inputs = self.generate_dummy_inputs_for_validation(dummy_inputs, onnx_input_names=onnx_input_names) + dummy_inputs = self.rename_ambiguous_inputs(dummy_inputs) onnx_inputs = {} for name, value in dummy_inputs.items(): diff --git a/optimum/exporters/onnx/convert.py b/optimum/exporters/onnx/convert.py index 2661d835979..c12a9ac222a 100644 --- a/optimum/exporters/onnx/convert.py +++ b/optimum/exporters/onnx/convert.py @@ -1183,6 +1183,10 @@ def onnx_export_from_model( if tokenizer_2 is not None: tokenizer_2.save_pretrained(output.joinpath("tokenizer_2")) + tokenizer_3 = getattr(model, "tokenizer_3", None) + if tokenizer_3 is not None: + tokenizer_3.save_pretrained(output.joinpath("tokenizer_3")) + model.save_config(output) if float_dtype == "bf16": diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 6b92109b7b6..8984162ee8c 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -13,6 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Model specific ONNX configurations.""" + import random from pathlib import Path from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union @@ -28,6 +29,8 @@ DummyCodegenDecoderTextInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, + DummyFluxTransformerTextInputGenerator, + DummyFluxTransformerVisionInputGenerator, DummyInputGenerator, DummyIntGenerator, DummyPastKeyValuesGenerator, @@ -38,6 +41,9 @@ DummySpeechT5InputGenerator, DummyTextInputGenerator, DummyTimestepInputGenerator, + DummyTransformerTextInputGenerator, + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, DummyVisionEmbeddingsGenerator, DummyVisionEncoderDecoderPastKeyValuesGenerator, DummyVisionInputGenerator, @@ -53,6 +59,7 @@ NormalizedTextConfig, NormalizedTextConfigWithGQA, NormalizedVisionConfig, + check_if_diffusers_greater, check_if_transformers_greater, is_diffusers_available, logging, @@ -1039,22 +1046,13 @@ def outputs(self) -> Dict[str, Dict[int, str]]: "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, "pooler_output": {0: "batch_size"}, } + if self._normalized_config.output_hidden_states: for i in range(self._normalized_config.num_layers + 1): common_outputs[f"hidden_states.{i}"] = {0: "batch_size", 1: "sequence_length"} return common_outputs - def generate_dummy_inputs(self, framework: str = "pt", **kwargs): - dummy_inputs = super().generate_dummy_inputs(framework=framework, **kwargs) - - # TODO: fix should be by casting inputs during inference and not export - if framework == "pt": - import torch - - dummy_inputs["input_ids"] = dummy_inputs["input_ids"].to(dtype=torch.int32) - return dummy_inputs - def patch_model_for_export( self, model: Union["PreTrainedModel", "TFPreTrainedModel", "ModelMixin"], @@ -1064,7 +1062,7 @@ def patch_model_for_export( class UNetOnnxConfig(VisionOnnxConfig): - ATOL_FOR_VALIDATION = 1e-3 + ATOL_FOR_VALIDATION = 1e-4 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu # operator support, available since opset 14 DEFAULT_ONNX_OPSET = 14 @@ -1087,17 +1085,19 @@ class UNetOnnxConfig(VisionOnnxConfig): def inputs(self) -> Dict[str, Dict[int, str]]: common_inputs = { "sample": {0: "batch_size", 2: "height", 3: "width"}, - "timestep": {0: "steps"}, + "timestep": {}, # a scalar with no dimension "encoder_hidden_states": {0: "batch_size", 1: "sequence_length"}, } - # TODO : add text_image, image and image_embeds + # TODO : add addition_embed_type == text_image, image and image_embeds + # https://github.com/huggingface/diffusers/blob/9366c8f84bfe47099ff047272661786ebb54721d/src/diffusers/models/unets/unet_2d_condition.py#L671 if getattr(self._normalized_config, "addition_embed_type", None) == "text_time": common_inputs["text_embeds"] = {0: "batch_size"} common_inputs["time_ids"] = {0: "batch_size"} if getattr(self._normalized_config, "time_cond_proj_dim", None) is not None: common_inputs["timestep_cond"] = {0: "batch_size"} + return common_inputs @property @@ -1136,7 +1136,7 @@ def ordered_inputs(self, model) -> Dict[str, Dict[int, str]]: class VaeEncoderOnnxConfig(VisionOnnxConfig): - ATOL_FOR_VALIDATION = 1e-4 + ATOL_FOR_VALIDATION = 3e-4 # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu # operator support, available since opset 14 DEFAULT_ONNX_OPSET = 14 @@ -1184,6 +1184,101 @@ def outputs(self) -> Dict[str, Dict[int, str]]: } +class T5EncoderOnnxConfig(TextEncoderOnnxConfig): + NORMALIZED_CONFIG_CLASS = NormalizedTextConfig + ATOL_FOR_VALIDATION = 1e-4 + DEFAULT_ONNX_OPSET = 12 # int64 was supported since opset 12 + + @property + def inputs(self): + return { + "input_ids": {0: "batch_size", 1: "sequence_length"}, + } + + @property + def outputs(self): + return { + "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, + } + + +class SD3TransformerOnnxConfig(VisionOnnxConfig): + ATOL_FOR_VALIDATION = 1e-4 + # The ONNX export of a CLIPText architecture, an other Stable Diffusion component, needs the Trilu + # operator support, available since opset 14 + DEFAULT_ONNX_OPSET = 14 + + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, + DummyTransformerTextInputGenerator, + ) + + NORMALIZED_CONFIG_CLASS = NormalizedConfig.with_args( + image_size="sample_size", + num_channels="in_channels", + vocab_size="attention_head_dim", + hidden_size="joint_attention_dim", + projection_size="pooled_projection_dim", + allow_new=True, + ) + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + common_inputs = { + "hidden_states": {0: "batch_size", 2: "height", 3: "width"}, + "encoder_hidden_states": {0: "batch_size", 1: "sequence_length"}, + "pooled_projections": {0: "batch_size"}, + "timestep": {0: "step"}, + } + + return common_inputs + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "out_hidden_states": {0: "batch_size", 2: "height", 3: "width"}, + } + + @property + def torch_to_onnx_output_map(self) -> Dict[str, str]: + return { + "sample": "out_hidden_states", + } + + +class FluxTransformerOnnxConfig(SD3TransformerOnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = ( + DummyTransformerTimestepInputGenerator, + DummyFluxTransformerVisionInputGenerator, + DummyFluxTransformerTextInputGenerator, + ) + + @property + def inputs(self): + common_inputs = super().inputs + common_inputs["hidden_states"] = {0: "batch_size", 1: "packed_height_width"} + common_inputs["txt_ids"] = ( + {0: "sequence_length"} if check_if_diffusers_greater("0.31.0") else {0: "batch_size", 1: "sequence_length"} + ) + common_inputs["img_ids"] = ( + {0: "packed_height_width"} + if check_if_diffusers_greater("0.31.0") + else {0: "batch_size", 1: "packed_height_width"} + ) + + if getattr(self._normalized_config, "guidance_embeds", False): + common_inputs["guidance"] = {0: "batch_size"} + + return common_inputs + + @property + def outputs(self): + return { + "out_hidden_states": {0: "batch_size", 1: "packed_height_width"}, + } + + class GroupViTOnnxConfig(CLIPOnnxConfig): pass diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index fdc8bfcb539..b4bce4696f3 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -335,7 +335,11 @@ class TasksManager: } _DIFFUSERS_SUPPORTED_MODEL_TYPE = { - "clip-text-model": supported_tasks_mapping( + "t5-encoder": supported_tasks_mapping( + "feature-extraction", + onnx="T5EncoderOnnxConfig", + ), + "clip-text": supported_tasks_mapping( "feature-extraction", onnx="CLIPTextOnnxConfig", ), @@ -343,7 +347,15 @@ class TasksManager: "feature-extraction", onnx="CLIPTextWithProjectionOnnxConfig", ), - "unet": supported_tasks_mapping( + "flux-transformer-2d": supported_tasks_mapping( + "semantic-segmentation", + onnx="FluxTransformerOnnxConfig", + ), + "sd3-transformer-2d": supported_tasks_mapping( + "semantic-segmentation", + onnx="SD3TransformerOnnxConfig", + ), + "unet-2d-condition": supported_tasks_mapping( "semantic-segmentation", onnx="UNetOnnxConfig", ), @@ -1177,12 +1189,17 @@ class TasksManager: "transformers": _SUPPORTED_MODEL_TYPE, } _UNSUPPORTED_CLI_MODEL_TYPE = { - "unet", + # diffusers model types + "clip-text", + "clip-text-with-projection", + "flux-transformer-2d", + "sd3-transformer-2d", + "t5-encoder", + "unet-2d-condition", "vae-encoder", "vae-decoder", - "clip-text-model", - "clip-text-with-projection", - "trocr", # supported through the vision-encoder-decoder model type + # redundant model types + "trocr", # same as vision-encoder-decoder } _SUPPORTED_CLI_MODEL_TYPE = ( set(_SUPPORTED_MODEL_TYPE.keys()) diff --git a/optimum/exporters/utils.py b/optimum/exporters/utils.py index 949b54f4685..60de169de5e 100644 --- a/optimum/exporters/utils.py +++ b/optimum/exporters/utils.py @@ -15,7 +15,6 @@ """Utilities for model preparation to export.""" - import copy from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union @@ -44,12 +43,7 @@ "Please update diffusers by running `pip install --upgrade diffusers`" ) - from diffusers import ( - DiffusionPipeline, - StableDiffusionXLImg2ImgPipeline, - StableDiffusionXLInpaintPipeline, - StableDiffusionXLPipeline, - ) + from diffusers import DiffusionPipeline from diffusers.models.attention_processor import ( Attention, AttnAddedKVProcessor, @@ -80,6 +74,20 @@ DECODER_MERGED_NAME = "decoder_model_merged" +_DIFFUSERS_CLASS_NAME_TO_SUBMODEL_TYPE = { + "CLIPTextModel": "clip-text", + "CLIPTextModelWithProjection": "clip-text-with-projection", + "FluxTransformer2DModel": "flux-transformer-2d", + "SD3Transformer2DModel": "sd3-transformer-2d", + "UNet2DConditionModel": "unet-2d-condition", + "T5EncoderModel": "t5-encoder", +} + + +def _get_diffusers_submodel_type(submodel): + return _DIFFUSERS_CLASS_NAME_TO_SUBMODEL_TYPE.get(submodel.__class__.__name__) + + def _get_submodels_for_export_diffusion( pipeline: "DiffusionPipeline", ) -> Dict[str, Union["PreTrainedModel", "ModelMixin"]]: @@ -87,56 +95,87 @@ def _get_submodels_for_export_diffusion( Returns the components of a Stable Diffusion model. """ - is_stable_diffusion_xl = isinstance( - pipeline, (StableDiffusionXLPipeline, StableDiffusionXLImg2ImgPipeline, StableDiffusionXLInpaintPipeline) - ) - if is_stable_diffusion_xl: - projection_dim = pipeline.text_encoder_2.config.projection_dim - else: - projection_dim = pipeline.text_encoder.config.projection_dim - models_for_export = {} + is_torch_greater_or_equal_than_2_1 = version.parse(torch.__version__) >= version.parse("2.1.0") + is_sdxl = pipeline.__class__.__name__.startswith("StableDiffusionXL") + is_sd3 = pipeline.__class__.__name__.startswith("StableDiffusion3") + # Text encoder text_encoder = getattr(pipeline, "text_encoder", None) if text_encoder is not None: - if is_stable_diffusion_xl: + if is_sdxl or is_sd3: text_encoder.config.output_hidden_states = True + text_encoder.text_model.config.output_hidden_states = True + + text_encoder.config.export_model_type = _get_diffusers_submodel_type(text_encoder) models_for_export["text_encoder"] = text_encoder - # U-NET - # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 - is_torch_greater_or_equal_than_2_1 = version.parse(torch.__version__) >= version.parse("2.1.0") - if not is_torch_greater_or_equal_than_2_1: - pipeline.unet.set_attn_processor(AttnProcessor()) + # Text encoder 2 + text_encoder_2 = getattr(pipeline, "text_encoder_2", None) + if text_encoder_2 is not None: + if is_sdxl or is_sd3: + text_encoder_2.config.output_hidden_states = True + text_encoder_2.text_model.config.output_hidden_states = True - pipeline.unet.config.text_encoder_projection_dim = projection_dim - # The U-NET time_ids inputs shapes depends on the value of `requires_aesthetics_score` - # https://github.com/huggingface/diffusers/blob/v0.18.2/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L571 - pipeline.unet.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) - models_for_export["unet"] = pipeline.unet + text_encoder_2.config.export_model_type = _get_diffusers_submodel_type(text_encoder_2) + models_for_export["text_encoder_2"] = text_encoder_2 - # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 + # Text encoder 3 + text_encoder_3 = getattr(pipeline, "text_encoder_3", None) + if text_encoder_3 is not None: + text_encoder_3.config.export_model_type = _get_diffusers_submodel_type(text_encoder_3) + models_for_export["text_encoder_3"] = text_encoder_3 + + # U-NET + unet = getattr(pipeline, "unet", None) + if unet is not None: + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 + if not is_torch_greater_or_equal_than_2_1: + unet.set_attn_processor(AttnProcessor()) + + # The U-NET time_ids inputs shapes depends on the value of `requires_aesthetics_score` + # https://github.com/huggingface/diffusers/blob/v0.18.2/src/diffusers/pipelines/stable_diffusion_xl/pipeline_stable_diffusion_xl_img2img.py#L571 + unet.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) + unet.config.time_cond_proj_dim = getattr(pipeline.unet.config, "time_cond_proj_dim", None) + unet.config.text_encoder_projection_dim = pipeline.text_encoder.config.projection_dim + unet.config.export_model_type = _get_diffusers_submodel_type(unet) + models_for_export["unet"] = unet + + # Transformer + transformer = getattr(pipeline, "transformer", None) + if transformer is not None: + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 + if not is_torch_greater_or_equal_than_2_1: + transformer.set_attn_processor(AttnProcessor()) + + transformer.config.requires_aesthetics_score = getattr(pipeline.config, "requires_aesthetics_score", False) + transformer.config.time_cond_proj_dim = getattr(pipeline.transformer.config, "time_cond_proj_dim", None) + transformer.config.text_encoder_projection_dim = pipeline.text_encoder.config.projection_dim + transformer.config.export_model_type = _get_diffusers_submodel_type(transformer) + models_for_export["transformer"] = transformer + + # VAE Encoder vae_encoder = copy.deepcopy(pipeline.vae) + + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 if not is_torch_greater_or_equal_than_2_1: vae_encoder = override_diffusers_2_0_attn_processors(vae_encoder) + # we return the distribution parameters to be able to recreate it in the decoder vae_encoder.forward = lambda sample: {"latent_parameters": vae_encoder.encode(x=sample)["latent_dist"].parameters} models_for_export["vae_encoder"] = vae_encoder - # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 + # VAE Decoder vae_decoder = copy.deepcopy(pipeline.vae) + + # ONNX export of torch.nn.functional.scaled_dot_product_attention not supported for < v2.1.0 if not is_torch_greater_or_equal_than_2_1: vae_decoder = override_diffusers_2_0_attn_processors(vae_decoder) + vae_decoder.forward = lambda latent_sample: vae_decoder.decode(z=latent_sample) models_for_export["vae_decoder"] = vae_decoder - text_encoder_2 = getattr(pipeline, "text_encoder_2", None) - if text_encoder_2 is not None: - text_encoder_2.config.output_hidden_states = True - text_encoder_2.text_model.config.output_hidden_states = True - models_for_export["text_encoder_2"] = text_encoder_2 - return models_for_export @@ -294,33 +333,59 @@ def get_diffusion_models_for_export( `Dict[str, Tuple[Union[`PreTrainedModel`, `TFPreTrainedModel`], `ExportConfig`]: A Dict containing the model and export configs for the different components of the model. """ + models_for_export = _get_submodels_for_export_diffusion(pipeline) # Text encoder if "text_encoder" in models_for_export: + text_encoder = models_for_export["text_encoder"] text_encoder_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.text_encoder, - exporter=exporter, - library_name="diffusers", - task="feature-extraction", + model=text_encoder, exporter=exporter, library_name="diffusers", task="feature-extraction" ) text_encoder_export_config = text_encoder_config_constructor( - pipeline.text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype + text_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype ) models_for_export["text_encoder"] = (models_for_export["text_encoder"], text_encoder_export_config) + # Text encoder 2 + if "text_encoder_2" in models_for_export: + text_encoder_2 = models_for_export["text_encoder_2"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=text_encoder_2, exporter=exporter, library_name="diffusers", task="feature-extraction" + ) + export_config = export_config_constructor(text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["text_encoder_2"] = (models_for_export["text_encoder_2"], export_config) + + # Text encoder 3 + if "text_encoder_3" in models_for_export: + text_encoder_3 = models_for_export["text_encoder_3"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=text_encoder_3, exporter=exporter, library_name="diffusers", task="feature-extraction" + ) + export_config = export_config_constructor(text_encoder_3.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["text_encoder_3"] = (models_for_export["text_encoder_3"], export_config) + # U-NET - export_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.unet, - exporter=exporter, - library_name="diffusers", - task="semantic-segmentation", - model_type="unet", - ) - unet_export_config = export_config_constructor(pipeline.unet.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["unet"] = (models_for_export["unet"], unet_export_config) + if "unet" in models_for_export: + unet = models_for_export["unet"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=unet, exporter=exporter, library_name="diffusers", task="semantic-segmentation" + ) + unet_export_config = export_config_constructor(unet.config, int_dtype=int_dtype, float_dtype=float_dtype) + models_for_export["unet"] = (models_for_export["unet"], unet_export_config) - # VAE Encoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L565 + # Transformer + if "transformer" in models_for_export: + transformer = models_for_export["transformer"] + export_config_constructor = TasksManager.get_exporter_config_constructor( + model=transformer, exporter=exporter, library_name="diffusers", task="semantic-segmentation" + ) + transformer_export_config = export_config_constructor( + transformer.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["transformer"] = (models_for_export["transformer"], transformer_export_config) + + # VAE Encoder vae_encoder = models_for_export["vae_encoder"] vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_encoder, @@ -329,10 +394,12 @@ def get_diffusion_models_for_export( task="semantic-segmentation", model_type="vae-encoder", ) - vae_export_config = vae_config_constructor(vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["vae_encoder"] = (vae_encoder, vae_export_config) + vae_encoder_export_config = vae_config_constructor( + vae_encoder.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["vae_encoder"] = (vae_encoder, vae_encoder_export_config) - # VAE Decoder https://github.com/huggingface/diffusers/blob/v0.11.1/src/diffusers/models/vae.py#L600 + # VAE Decoder vae_decoder = models_for_export["vae_decoder"] vae_config_constructor = TasksManager.get_exporter_config_constructor( model=vae_decoder, @@ -341,21 +408,10 @@ def get_diffusion_models_for_export( task="semantic-segmentation", model_type="vae-decoder", ) - vae_export_config = vae_config_constructor(vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype) - models_for_export["vae_decoder"] = (vae_decoder, vae_export_config) - - if "text_encoder_2" in models_for_export: - export_config_constructor = TasksManager.get_exporter_config_constructor( - model=pipeline.text_encoder_2, - exporter=exporter, - library_name="diffusers", - task="feature-extraction", - model_type="clip-text-with-projection", - ) - export_config = export_config_constructor( - pipeline.text_encoder_2.config, int_dtype=int_dtype, float_dtype=float_dtype - ) - models_for_export["text_encoder_2"] = (models_for_export["text_encoder_2"], export_config) + vae_decoder_export_config = vae_config_constructor( + vae_decoder.config, int_dtype=int_dtype, float_dtype=float_dtype + ) + models_for_export["vae_decoder"] = (vae_decoder, vae_decoder_export_config) return models_for_export diff --git a/optimum/onnxruntime/__init__.py b/optimum/onnxruntime/__init__.py index 4e25a436909..f3f1535fd45 100644 --- a/optimum/onnxruntime/__init__.py +++ b/optimum/onnxruntime/__init__.py @@ -74,33 +74,51 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: _import_structure[".utils.dummy_diffusers_objects"] = [ - "ORTStableDiffusionPipeline", + "ORTDiffusionPipeline", + "ORTPipelineForText2Image", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", + # flux + "ORTFluxPipeline", + # lcm + "ORTLatentConsistencyModelImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", + # sd3 + "ORTStableDiffusion3Img2ImgPipeline", + "ORTStableDiffusion3InpaintPipeline", + "ORTStableDiffusion3Pipeline", + # sd "ORTStableDiffusionImg2ImgPipeline", "ORTStableDiffusionInpaintPipeline", - "ORTStableDiffusionXLPipeline", + "ORTStableDiffusionPipeline", + # xl "ORTStableDiffusionXLImg2ImgPipeline", "ORTStableDiffusionXLInpaintPipeline", - "ORTLatentConsistencyModelPipeline", - "ORTLatentConsistencyModelImg2ImgPipeline", - "ORTPipelineForImage2Image", - "ORTPipelineForInpainting", - "ORTPipelineForText2Image", - "ORTDiffusionPipeline", + "ORTStableDiffusionXLPipeline", ] else: _import_structure["modeling_diffusion"] = [ - "ORTStableDiffusionPipeline", + "ORTDiffusionPipeline", + "ORTPipelineForText2Image", + "ORTPipelineForImage2Image", + "ORTPipelineForInpainting", + # flux + "ORTFluxPipeline", + # lcm + "ORTLatentConsistencyModelImg2ImgPipeline", + "ORTLatentConsistencyModelPipeline", + # sd3 + "ORTStableDiffusion3Img2ImgPipeline", + "ORTStableDiffusion3InpaintPipeline", + "ORTStableDiffusion3Pipeline", + # sd "ORTStableDiffusionImg2ImgPipeline", "ORTStableDiffusionInpaintPipeline", - "ORTStableDiffusionXLPipeline", + "ORTStableDiffusionPipeline", + # xl "ORTStableDiffusionXLImg2ImgPipeline", "ORTStableDiffusionXLInpaintPipeline", - "ORTLatentConsistencyModelImg2ImgPipeline", - "ORTLatentConsistencyModelPipeline", - "ORTPipelineForImage2Image", - "ORTPipelineForInpainting", - "ORTPipelineForText2Image", - "ORTDiffusionPipeline", + "ORTStableDiffusionXLPipeline", ] @@ -151,30 +169,52 @@ raise OptionalDependencyNotAvailable() except OptionalDependencyNotAvailable: from ..utils.dummy_diffusers_objects import ( + # generic entrypoint ORTDiffusionPipeline, + # flux + ORTFluxPipeline, + # lcm ORTLatentConsistencyModelImg2ImgPipeline, ORTLatentConsistencyModelPipeline, + # task-specific entrypoints ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, + # sd3 + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTStableDiffusion3Pipeline, + # sd ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, + # xl ORTStableDiffusionXLImg2ImgPipeline, ORTStableDiffusionXLInpaintPipeline, ORTStableDiffusionXLPipeline, ) else: from .modeling_diffusion import ( + # generic entrypoint ORTDiffusionPipeline, + # flux + ORTFluxPipeline, + # lcm ORTLatentConsistencyModelImg2ImgPipeline, ORTLatentConsistencyModelPipeline, + # task-specific entrypoints ORTPipelineForImage2Image, ORTPipelineForInpainting, ORTPipelineForText2Image, + # sd3 + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTStableDiffusion3Pipeline, + # sd ORTStableDiffusionImg2ImgPipeline, ORTStableDiffusionInpaintPipeline, ORTStableDiffusionPipeline, + # xl ORTStableDiffusionXLImg2ImgPipeline, ORTStableDiffusionXLInpaintPipeline, ORTStableDiffusionXLPipeline, diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 3899a7b36b6..79d302be449 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -57,7 +57,9 @@ from ..onnx.utils import _get_model_external_data_paths from ..utils import ( DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, + DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, + DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, @@ -76,7 +78,7 @@ if check_if_diffusers_greater("0.25.0"): from diffusers.models.autoencoders.vae import DiagonalGaussianDistribution else: - from diffusers.models.vae import DiagonalGaussianDistribution + from diffusers.models.vae import DiagonalGaussianDistribution # type: ignore logger = logging.getLogger(__name__) @@ -92,15 +94,18 @@ class ORTDiffusionPipeline(ORTModel, DiffusionPipeline): def __init__( self, scheduler: "SchedulerMixin", - unet_session: ort.InferenceSession, vae_decoder_session: ort.InferenceSession, # optional pipeline models + unet_session: Optional[ort.InferenceSession] = None, + transformer_session: Optional[ort.InferenceSession] = None, vae_encoder_session: Optional[ort.InferenceSession] = None, text_encoder_session: Optional[ort.InferenceSession] = None, text_encoder_2_session: Optional[ort.InferenceSession] = None, + text_encoder_3_session: Optional[ort.InferenceSession] = None, # optional pipeline submodels tokenizer: Optional["CLIPTokenizer"] = None, tokenizer_2: Optional["CLIPTokenizer"] = None, + tokenizer_3: Optional["CLIPTokenizer"] = None, feature_extractor: Optional["CLIPFeatureExtractor"] = None, # stable diffusion xl specific arguments force_zeros_for_empty_prompt: bool = True, @@ -111,16 +116,20 @@ def __init__( model_save_dir: Optional[Union[str, Path, TemporaryDirectory]] = None, **kwargs, ): - self.unet = ORTModelUnet(unet_session, self) - self.vae_decoder = ORTModelVaeDecoder(vae_decoder_session, self) - self.vae_encoder = ORTModelVaeEncoder(vae_encoder_session, self) if vae_encoder_session is not None else None + self.unet = ORTModelUnet(unet_session, self) if unet_session is not None else None + self.transformer = ORTModelTransformer(transformer_session, self) if transformer_session is not None else None self.text_encoder = ( ORTModelTextEncoder(text_encoder_session, self) if text_encoder_session is not None else None ) self.text_encoder_2 = ( ORTModelTextEncoder(text_encoder_2_session, self) if text_encoder_2_session is not None else None ) + self.text_encoder_3 = ( + ORTModelTextEncoder(text_encoder_3_session, self) if text_encoder_3_session is not None else None + ) # We wrap the VAE Decoder & Encoder in a single object to simulate diffusers API + self.vae_encoder = ORTModelVaeEncoder(vae_encoder_session, self) if vae_encoder_session is not None else None + self.vae_decoder = ORTModelVaeDecoder(vae_decoder_session, self) if vae_decoder_session is not None else None self.vae = ORTWrapperVae(self.vae_encoder, self.vae_decoder) # we allow passing these as torch models for now @@ -130,18 +139,22 @@ def __init__( self.scheduler = scheduler self.tokenizer = tokenizer self.tokenizer_2 = tokenizer_2 + self.tokenizer_3 = tokenizer_3 self.feature_extractor = feature_extractor all_pipeline_init_args = { "vae": self.vae, "unet": self.unet, + "transformer": self.transformer, "text_encoder": self.text_encoder, "text_encoder_2": self.text_encoder_2, + "text_encoder_3": self.text_encoder_3, "safety_checker": self.safety_checker, "image_encoder": self.image_encoder, "scheduler": self.scheduler, "tokenizer": self.tokenizer, "tokenizer_2": self.tokenizer_2, + "tokenizer_3": self.tokenizer_3, "feature_extractor": self.feature_extractor, "requires_aesthetics_score": requires_aesthetics_score, "force_zeros_for_empty_prompt": force_zeros_for_empty_prompt, @@ -157,7 +170,10 @@ def __init__( # inits ort specific attributes self.shared_attributes_init( - model=unet_session, use_io_binding=use_io_binding, model_save_dir=model_save_dir, **kwargs + model=unet_session if unet_session is not None else transformer_session, + use_io_binding=use_io_binding, + model_save_dir=model_save_dir, + **kwargs, ) def _save_pretrained(self, save_directory: Union[str, Path]): @@ -165,10 +181,12 @@ def _save_pretrained(self, save_directory: Union[str, Path]): models_to_save_paths = { (self.unet, save_directory / DIFFUSION_MODEL_UNET_SUBFOLDER), + (self.transformer, save_directory / DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER), (self.vae_decoder, save_directory / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER), (self.vae_encoder, save_directory / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER), (self.text_encoder, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER), (self.text_encoder_2, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER), + (self.text_encoder_3, save_directory / DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER), } for model, save_path in models_to_save_paths: if model is not None: @@ -192,6 +210,8 @@ def _save_pretrained(self, save_directory: Union[str, Path]): self.tokenizer.save_pretrained(save_directory / "tokenizer") if self.tokenizer_2 is not None: self.tokenizer_2.save_pretrained(save_directory / "tokenizer_2") + if self.tokenizer_3 is not None: + self.tokenizer_3.save_pretrained(save_directory / "tokenizer_3") if self.feature_extractor is not None: self.feature_extractor.save_pretrained(save_directory / "feature_extractor") @@ -208,10 +228,12 @@ def _from_pretrained( cache_dir: str = HUGGINGFACE_HUB_CACHE, token: Optional[Union[bool, str]] = None, unet_file_name: str = ONNX_WEIGHTS_NAME, + transformer_file_name: str = ONNX_WEIGHTS_NAME, vae_decoder_file_name: str = ONNX_WEIGHTS_NAME, vae_encoder_file_name: str = ONNX_WEIGHTS_NAME, text_encoder_file_name: str = ONNX_WEIGHTS_NAME, text_encoder_2_file_name: str = ONNX_WEIGHTS_NAME, + text_encoder_3_file_name: str = ONNX_WEIGHTS_NAME, use_io_binding: Optional[bool] = None, provider: str = "CPUExecutionProvider", provider_options: Optional[Dict[str, Any]] = None, @@ -230,10 +252,12 @@ def _from_pretrained( allow_patterns.update( { unet_file_name, + transformer_file_name, vae_decoder_file_name, vae_encoder_file_name, text_encoder_file_name, text_encoder_2_file_name, + text_encoder_3_file_name, SCHEDULER_CONFIG_NAME, cls.config_name, CONFIG_NAME, @@ -259,10 +283,12 @@ def _from_pretrained( model_paths = { "unet": model_save_path / DIFFUSION_MODEL_UNET_SUBFOLDER / unet_file_name, + "transformer": model_save_path / DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER / transformer_file_name, "vae_decoder": model_save_path / DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER / vae_decoder_file_name, "vae_encoder": model_save_path / DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER / vae_encoder_file_name, "text_encoder": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER / text_encoder_file_name, "text_encoder_2": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER / text_encoder_2_file_name, + "text_encoder_3": model_save_path / DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER / text_encoder_3_file_name, } sessions = {} @@ -276,7 +302,7 @@ def _from_pretrained( ) submodels = {} - for submodel in {"scheduler", "tokenizer", "tokenizer_2", "feature_extractor"}: + for submodel in {"scheduler", "tokenizer", "tokenizer_2", "tokenizer_3", "feature_extractor"}: if kwargs.get(submodel, None) is not None: submodels[submodel] = kwargs.pop(submodel) elif config.get(submodel, (None, None))[0] is not None: @@ -385,17 +411,24 @@ def to(self, device: Union[torch.device, str, int]): if device.type == "cuda" and self.providers[0] == "TensorrtExecutionProvider": return self - self.unet.session.set_providers([provider], provider_options=[provider_options]) self.vae_decoder.session.set_providers([provider], provider_options=[provider_options]) + if self.unet is not None: + self.unet.session.set_providers([provider], provider_options=[provider_options]) + if self.transformer is not None: + self.transformer.session.set_providers([provider], provider_options=[provider_options]) if self.vae_encoder is not None: self.vae_encoder.session.set_providers([provider], provider_options=[provider_options]) if self.text_encoder is not None: self.text_encoder.session.set_providers([provider], provider_options=[provider_options]) if self.text_encoder_2 is not None: self.text_encoder_2.session.set_providers([provider], provider_options=[provider_options]) + if self.text_encoder_3 is not None: + self.text_encoder_3.session.set_providers([provider], provider_options=[provider_options]) - self.providers = self.unet.session.get_providers() + self.providers = ( + self.unet.session.get_providers() if self.unet is not None else self.transformer.session.get_providers() + ) self._device = device return self @@ -412,8 +445,10 @@ def components(self) -> Dict[str, Any]: components = { "vae": self.vae, "unet": self.unet, + "transformer": self.transformer, "text_encoder": self.text_encoder, "text_encoder_2": self.text_encoder_2, + "text_encoder_3": self.text_encoder_3, "safety_checker": self.safety_checker, "image_encoder": self.image_encoder, } @@ -443,9 +478,13 @@ def __init__(self, session: ort.InferenceSession, parent_pipeline: ORTDiffusionP self.input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} self.output_names = {output_key.name: idx for idx, output_key in enumerate(self.session.get_outputs())} + self.input_dtypes = {input_key.name: input_key.type for input_key in self.session.get_inputs()} self.output_dtypes = {output_key.name: output_key.type for output_key in self.session.get_outputs()} + self.input_shapes = {input_key.name: input_key.shape for input_key in self.session.get_inputs()} + self.output_shapes = {output_key.name: output_key.shape for output_key in self.session.get_outputs()} + config_file_path = Path(session._model_path).parent / self.config_name if not config_file_path.is_file(): # config is mandatory for the model part to be used for inference @@ -543,13 +582,18 @@ def __init__(self, *args, **kwargs): ) self.register_to_config(time_cond_proj_dim=None) + if len(self.input_shapes["timestep"]) > 0: + logger.warning( + "The exported unet onnx model expects a non scalar timestep input. " + "We will have to unsqueeze the timestep input at each iteration which might be inefficient. " + "Please re-export the pipeline with newer version of optimum and diffusers to avoid this warning." + ) + def forward( self, sample: Union[np.ndarray, torch.Tensor], timestep: Union[np.ndarray, torch.Tensor], encoder_hidden_states: Union[np.ndarray, torch.Tensor], - text_embeds: Optional[Union[np.ndarray, torch.Tensor]] = None, - time_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, timestep_cond: Optional[Union[np.ndarray, torch.Tensor]] = None, cross_attention_kwargs: Optional[Dict[str, Any]] = None, added_cond_kwargs: Optional[Dict[str, Any]] = None, @@ -557,15 +601,13 @@ def forward( ): use_torch = isinstance(sample, torch.Tensor) - if len(timestep.shape) == 0: + if len(self.input_shapes["timestep"]) > 0: timestep = timestep.unsqueeze(0) model_inputs = { "sample": sample, "timestep": timestep, "encoder_hidden_states": encoder_hidden_states, - "text_embeds": text_embeds, - "time_ids": time_ids, "timestep_cond": timestep_cond, **(cross_attention_kwargs or {}), **(added_cond_kwargs or {}), @@ -581,6 +623,42 @@ def forward( return ModelOutput(**model_outputs) +class ORTModelTransformer(ORTPipelinePart): + def forward( + self, + hidden_states: Union[np.ndarray, torch.Tensor], + encoder_hidden_states: Union[np.ndarray, torch.Tensor], + pooled_projections: Union[np.ndarray, torch.Tensor], + timestep: Union[np.ndarray, torch.Tensor], + guidance: Optional[Union[np.ndarray, torch.Tensor]] = None, + txt_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, + img_ids: Optional[Union[np.ndarray, torch.Tensor]] = None, + joint_attention_kwargs: Optional[Dict[str, Any]] = None, + return_dict: bool = False, + ): + use_torch = isinstance(hidden_states, torch.Tensor) + + model_inputs = { + "hidden_states": hidden_states, + "encoder_hidden_states": encoder_hidden_states, + "pooled_projections": pooled_projections, + "timestep": timestep, + "guidance": guidance, + "txt_ids": txt_ids, + "img_ids": img_ids, + **(joint_attention_kwargs or {}), + } + + onnx_inputs = self.prepare_onnx_inputs(use_torch, **model_inputs) + onnx_outputs = self.session.run(None, onnx_inputs) + model_outputs = self.prepare_onnx_outputs(use_torch, *onnx_outputs) + + if return_dict: + return model_outputs + + return ModelOutput(**model_outputs) + + class ORTModelTextEncoder(ORTPipelinePart): def forward( self, @@ -599,11 +677,13 @@ def forward( if output_hidden_states: model_outputs["hidden_states"] = [] - for i in range(self.config.num_hidden_layers): + num_layers = self.num_hidden_layers if hasattr(self, "num_hidden_layers") else self.num_decoder_layers + for i in range(num_layers): model_outputs["hidden_states"].append(model_outputs.pop(f"hidden_states.{i}")) model_outputs["hidden_states"].append(model_outputs.get("last_hidden_state")) else: - for i in range(self.config.num_hidden_layers): + num_layers = self.num_hidden_layers if hasattr(self, "num_hidden_layers") else self.num_decoder_layers + for i in range(num_layers): model_outputs.pop(f"hidden_states.{i}", None) if return_dict: @@ -620,7 +700,7 @@ def __init__(self, *args, **kwargs): if not hasattr(self.config, "scaling_factor"): logger.warning( "The `scaling_factor` attribute is missing from the VAE encoder configuration. " - "Please re-export the model with newer version of optimum and diffusers." + "Please re-export the model with newer version of optimum and diffusers to avoid this warning." ) self.register_to_config(scaling_factor=2 ** (len(self.config.block_out_channels) - 1)) @@ -660,7 +740,7 @@ def __init__(self, *args, **kwargs): if not hasattr(self.config, "scaling_factor"): logger.warning( "The `scaling_factor` attribute is missing from the VAE decoder configuration. " - "Please re-export the model with newer version of optimum and diffusers." + "Please re-export the model with newer version of optimum and diffusers to avoid this warning." ) self.register_to_config(scaling_factor=2 ** (len(self.config.block_out_channels) - 1)) @@ -871,6 +951,80 @@ class ORTLatentConsistencyModelImg2ImgPipeline(ORTDiffusionPipeline, LatentConsi auto_model_class = LatentConsistencyModelImg2ImgPipeline +class ORTUnavailablePipeline: + MIN_VERSION = None + + def __init__(self, *args, **kwargs): + raise NotImplementedError( + f"The pipeline {self.__class__.__name__} is not available in the current version of `diffusers`. " + f"Please upgrade `diffusers` to {self.MIN_VERSION} or later." + ) + + +if check_if_diffusers_greater("0.29.0"): + from diffusers import StableDiffusion3Img2ImgPipeline, StableDiffusion3Pipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3Pipeline(ORTDiffusionPipeline, StableDiffusion3Pipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3Pipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/text2img#diffusers.StableDiffusion3Pipeline). + """ + + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = StableDiffusion3Pipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3Img2ImgPipeline(ORTDiffusionPipeline, StableDiffusion3Img2ImgPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3Img2ImgPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/img2img#diffusers.StableDiffusion3Img2ImgPipeline). + """ + + main_input_name = "image" + export_feature = "image-to-image" + auto_model_class = StableDiffusion3Img2ImgPipeline + +else: + + class ORTStableDiffusion3Pipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.29.0" + + class ORTStableDiffusion3Img2ImgPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.29.0" + + +if check_if_diffusers_greater("0.30.0"): + from diffusers import FluxPipeline, StableDiffusion3InpaintPipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTStableDiffusion3InpaintPipeline(ORTDiffusionPipeline, StableDiffusion3InpaintPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.StableDiffusion3InpaintPipeline](https://huggingface.co/docs/diffusers/api/pipelines/stable_diffusion/inpaint#diffusers.StableDiffusion3InpaintPipeline). + """ + + main_input_name = "prompt" + export_feature = "inpainting" + auto_model_class = StableDiffusion3InpaintPipeline + + @add_end_docstrings(ONNX_MODEL_END_DOCSTRING) + class ORTFluxPipeline(ORTDiffusionPipeline, FluxPipeline): + """ + ONNX Runtime-powered stable diffusion pipeline corresponding to [diffusers.FluxPipeline](https://huggingface.co/docs/diffusers/api/pipelines/flux/text2img#diffusers.FluxPipeline). + """ + + main_input_name = "prompt" + export_feature = "text-to-image" + auto_model_class = FluxPipeline + +else: + + class ORTStableDiffusion3InpaintPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.30.0" + + class ORTFluxPipeline(ORTUnavailablePipeline): + MIN_VERSION = "0.30.0" + + SUPPORTED_ORT_PIPELINES = [ ORTStableDiffusionPipeline, ORTStableDiffusionImg2ImgPipeline, @@ -880,6 +1034,10 @@ class ORTLatentConsistencyModelImg2ImgPipeline(ORTDiffusionPipeline, LatentConsi ORTStableDiffusionXLInpaintPipeline, ORTLatentConsistencyModelPipeline, ORTLatentConsistencyModelImg2ImgPipeline, + ORTStableDiffusion3Pipeline, + ORTStableDiffusion3Img2ImgPipeline, + ORTStableDiffusion3InpaintPipeline, + ORTFluxPipeline, ] @@ -897,23 +1055,27 @@ def _get_ort_class(pipeline_class_name: str, throw_error_if_not_exist: bool = Tr ORT_TEXT2IMAGE_PIPELINES_MAPPING = OrderedDict( [ + ("flux", ORTFluxPipeline), + ("latent-consistency", ORTLatentConsistencyModelPipeline), ("stable-diffusion", ORTStableDiffusionPipeline), + ("stable-diffusion-3", ORTStableDiffusion3Pipeline), ("stable-diffusion-xl", ORTStableDiffusionXLPipeline), - ("latent-consistency", ORTLatentConsistencyModelPipeline), ] ) ORT_IMAGE2IMAGE_PIPELINES_MAPPING = OrderedDict( [ + ("latent-consistency", ORTLatentConsistencyModelImg2ImgPipeline), ("stable-diffusion", ORTStableDiffusionImg2ImgPipeline), + ("stable-diffusion-3", ORTStableDiffusion3Img2ImgPipeline), ("stable-diffusion-xl", ORTStableDiffusionXLImg2ImgPipeline), - ("latent-consistency", ORTLatentConsistencyModelImg2ImgPipeline), ] ) ORT_INPAINT_PIPELINES_MAPPING = OrderedDict( [ ("stable-diffusion", ORTStableDiffusionInpaintPipeline), + ("stable-diffusion-3", ORTStableDiffusion3InpaintPipeline), ("stable-diffusion-xl", ORTStableDiffusionXLInpaintPipeline), ] ) diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py index db7d1f6975d..40d93d298e4 100644 --- a/optimum/utils/__init__.py +++ b/optimum/utils/__init__.py @@ -16,7 +16,9 @@ from .constant import ( CONFIG_NAME, DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER, + DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER, DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER, + DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER, DIFFUSION_MODEL_UNET_SUBFOLDER, DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER, DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER, @@ -52,6 +54,8 @@ DummyCodegenDecoderTextInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, + DummyFluxTransformerTextInputGenerator, + DummyFluxTransformerVisionInputGenerator, DummyInputGenerator, DummyIntGenerator, DummyLabelsGenerator, @@ -63,6 +67,9 @@ DummySpeechT5InputGenerator, DummyTextInputGenerator, DummyTimestepInputGenerator, + DummyTransformerTextInputGenerator, + DummyTransformerTimestepInputGenerator, + DummyTransformerVisionInputGenerator, DummyVisionEmbeddingsGenerator, DummyVisionEncoderDecoderPastKeyValuesGenerator, DummyVisionInputGenerator, diff --git a/optimum/utils/constant.py b/optimum/utils/constant.py index 4497b5246d4..eb7a67e9ece 100644 --- a/optimum/utils/constant.py +++ b/optimum/utils/constant.py @@ -15,8 +15,10 @@ CONFIG_NAME = "config.json" DIFFUSION_MODEL_UNET_SUBFOLDER = "unet" -DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" +DIFFUSION_MODEL_TRANSFORMER_SUBFOLDER = "transformer" DIFFUSION_MODEL_VAE_DECODER_SUBFOLDER = "vae_decoder" DIFFUSION_MODEL_VAE_ENCODER_SUBFOLDER = "vae_encoder" +DIFFUSION_MODEL_TEXT_ENCODER_SUBFOLDER = "text_encoder" DIFFUSION_MODEL_TEXT_ENCODER_2_SUBFOLDER = "text_encoder_2" +DIFFUSION_MODEL_TEXT_ENCODER_3_SUBFOLDER = "text_encoder_3" ONNX_WEIGHTS_NAME = "model.onnx" diff --git a/optimum/utils/dummy_diffusers_objects.py b/optimum/utils/dummy_diffusers_objects.py index 35d1ffe9fc7..ff8b587e19f 100644 --- a/optimum/utils/dummy_diffusers_objects.py +++ b/optimum/utils/dummy_diffusers_objects.py @@ -15,6 +15,50 @@ from .import_utils import DummyObject, requires_backends +class ORTDiffusionPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForText2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForImage2Image(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTPipelineForInpainting(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + class ORTStableDiffusionPipeline(metaclass=DummyObject): _backends = ["diffusers"] @@ -70,6 +114,17 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) +class ORTStableDiffusionXLInpaintPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + class ORTLatentConsistencyModelPipeline(metaclass=DummyObject): _backends = ["diffusers"] @@ -81,7 +136,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) -class ORTDiffusionPipeline(metaclass=DummyObject): +class ORTLatentConsistencyModelImg2ImgPipeline(metaclass=DummyObject): _backends = ["diffusers"] def __init__(self, *args, **kwargs): @@ -92,7 +147,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) -class ORTPipelineForText2Image(metaclass=DummyObject): +class ORTStableDiffusion3Pipeline(metaclass=DummyObject): _backends = ["diffusers"] def __init__(self, *args, **kwargs): @@ -103,7 +158,7 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) -class ORTPipelineForImage2Image(metaclass=DummyObject): +class ORTStableDiffusion3Img2ImgPipeline(metaclass=DummyObject): _backends = ["diffusers"] def __init__(self, *args, **kwargs): @@ -114,7 +169,18 @@ def from_pretrained(cls, *args, **kwargs): requires_backends(cls, ["diffusers"]) -class ORTPipelineForInpainting(metaclass=DummyObject): +class ORTStableDiffusion3InpaintPipeline(metaclass=DummyObject): + _backends = ["diffusers"] + + def __init__(self, *args, **kwargs): + requires_backends(self, ["diffusers"]) + + @classmethod + def from_pretrained(cls, *args, **kwargs): + requires_backends(cls, ["diffusers"]) + + +class ORTFluxPipeline(metaclass=DummyObject): _backends = ["diffusers"] def __init__(self, *args, **kwargs): diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index dac14a38114..148072aa0b4 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -22,7 +22,7 @@ import numpy as np from transformers.utils import is_tf_available, is_torch_available -from ..utils import check_if_transformers_greater +from ..utils import check_if_diffusers_greater, check_if_transformers_greater from .normalized_config import ( NormalizedConfig, NormalizedEncoderDecoderConfig, @@ -36,7 +36,7 @@ import torch if is_tf_available(): - import tensorflow as tf + import tensorflow as tf # type: ignore def check_framework_is_available(func): @@ -871,8 +871,8 @@ def __init__( def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): if input_name == "timestep": - shape = [self.batch_size] - return self.random_int_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=int_dtype) + shape = [] # a scalar with no dimension (it can be int or float depending on the sd architecture) + return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) if input_name == "text_embeds": dim = self.text_encoder_projection_dim @@ -1411,3 +1411,76 @@ def generate( float_dtype: str = "fp32", ): return self.random_int_tensor(shape=(1,), min_value=20, max_value=22, framework=framework, dtype=int_dtype) + + +class DummyTransformerTimestepInputGenerator(DummyTimestepInputGenerator): + SUPPORTED_INPUT_NAMES = ("timestep",) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "timestep": + shape = [self.batch_size] # With transformer diffusers, timestep is a 1D tensor + return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyTransformerVisionInputGenerator(DummyVisionInputGenerator): + SUPPORTED_INPUT_NAMES = ("hidden_states",) + + +class DummyTransformerTextInputGenerator(DummySeq2SeqDecoderTextInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "encoder_hidden_states", + "pooled_projection", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "encoder_hidden_states": + return super().generate(input_name, framework, int_dtype, float_dtype)[0] + + elif input_name == "pooled_projections": + return self.random_float_tensor( + [self.batch_size, self.normalized_config.projection_size], framework=framework, dtype=float_dtype + ) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyFluxTransformerVisionInputGenerator(DummyTransformerVisionInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "hidden_states", + "img_ids", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "hidden_states": + shape = [self.batch_size, (self.height // 2) * (self.width // 2), self.num_channels] + return self.random_float_tensor(shape, framework=framework, dtype=float_dtype) + elif input_name == "img_ids": + shape = ( + [(self.height // 2) * (self.width // 2), 3] + if check_if_diffusers_greater("0.31.0") + else [self.batch_size, (self.height // 2) * (self.width // 2), 3] + ) + return self.random_int_tensor(shape, max_value=1, framework=framework, dtype=int_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) + + +class DummyFluxTransformerTextInputGenerator(DummyTransformerTextInputGenerator): + SUPPORTED_INPUT_NAMES = ( + "encoder_hidden_states", + "pooled_projections", + "txt_ids", + ) + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "txt_ids": + shape = ( + [self.sequence_length, 3] + if check_if_diffusers_greater("0.31.0") + else [self.batch_size, self.sequence_length, 3] + ) + return self.random_int_tensor(shape, max_value=1, framework=framework, dtype=int_dtype) + + return super().generate(input_name, framework, int_dtype, float_dtype) diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index ccccb5510bf..31059c403de 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -297,9 +297,11 @@ } PYTORCH_DIFFUSION_MODEL = { + "flux": "optimum-internal-testing/tiny-random-flux", + "latent-consistency": "echarlaix/tiny-random-latent-consistency", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", + "stable-diffusion-3": "yujiepan/stable-diffusion-3-tiny-random", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", - "latent-consistency": "echarlaix/tiny-random-latent-consistency", } PYTORCH_TIMM_MODEL = { diff --git a/tests/exporters/onnx/test_onnx_export.py b/tests/exporters/onnx/test_onnx_export.py index 7671d6cd2e6..88288547c95 100644 --- a/tests/exporters/onnx/test_onnx_export.py +++ b/tests/exporters/onnx/test_onnx_export.py @@ -299,7 +299,6 @@ def _onnx_export_diffusion_models(self, model_type: str, model_name: str, device with TemporaryDirectory() as tmpdirname: _, onnx_outputs = export_models( models_and_onnx_configs=models_and_onnx_configs, - opset=14, output_dir=Path(tmpdirname), device=device, ) @@ -307,7 +306,6 @@ def _onnx_export_diffusion_models(self, model_type: str, model_name: str, device models_and_onnx_configs=models_and_onnx_configs, onnx_named_outputs=onnx_outputs, output_dir=Path(tmpdirname), - atol=1e-4, use_subprocess=False, ) diff --git a/tests/onnxruntime/test_diffusion.py b/tests/onnxruntime/test_diffusion.py index 956566f0e1f..07f90e8984e 100644 --- a/tests/onnxruntime/test_diffusion.py +++ b/tests/onnxruntime/test_diffusion.py @@ -34,6 +34,7 @@ ORTPipelineForInpainting, ORTPipelineForText2Image, ) +from optimum.utils import check_if_transformers_greater from optimum.utils.testing_utils import grid_parameters, require_diffusers @@ -71,7 +72,29 @@ def _generate_images(height=128, width=128, batch_size=1, channel=3, input_type= class ORTPipelineForText2ImageTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3", "flux"] + + NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + CALLBACK_SUPPORTED_ARCHITECTURES += ["flux"] ORTMODEL_CLASS = ORTPipelineForText2Image AUTOMODEL_CLASS = AutoPipelineForText2Image @@ -120,8 +143,8 @@ def test_num_images_per_prompt(self, model_arch: str): pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) for batch_size in [1, 3]: - for height in [64, 128]: - for width in [64, 128]: + for height in [16, 32]: + for width in [16, 32]: for num_images_per_prompt in [1, 3]: inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images @@ -142,12 +165,12 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): for output_type in ["latent", "np", "pt"]: inputs["output_type"] = output_type - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) - @parameterized.expand(SUPPORTED_ARCHITECTURES) + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -164,6 +187,7 @@ def __init__(self): def __call__(self, *args, **kwargs) -> None: self.has_been_called = True self.number_of_steps += 1 + return kwargs ort_callback = Callback() auto_callback = Callback() @@ -171,9 +195,8 @@ def __call__(self, *args, **kwargs) -> None: ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - # callback_steps=1 to trigger callback every step - ort_pipe(**inputs, callback=ort_callback, callback_steps=1) - auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) self.assertTrue(ort_callback.has_been_called) self.assertTrue(auto_callback.has_been_called) @@ -200,10 +223,20 @@ def test_shape(self, model_arch: str): elif output_type == "pt": self.assertEqual(outputs.shape, (batch_size, 3, height, width)) else: - self.assertEqual( - outputs.shape, - (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), - ) + expected_height = height // pipeline.vae_scale_factor + expected_width = width // pipeline.vae_scale_factor + + if model_arch == "flux": + channels = pipeline.transformer.config.in_channels + expected_shape = (batch_size, expected_height * expected_width, channels) + elif model_arch == "stable-diffusion-3": + out_channels = pipeline.transformer.config.out_channels + expected_shape = (batch_size, out_channels, expected_height, expected_width) + else: + out_channels = pipeline.unet.config.out_channels + expected_shape = (batch_size, out_channels, expected_height, expected_width) + + self.assertEqual(outputs.shape, expected_shape) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -224,45 +257,22 @@ def test_image_reproducibility(self, model_arch: str): self.assertFalse(np.array_equal(ort_outputs_1.images[0], ort_outputs_3.images[0])) np.testing.assert_allclose(ort_outputs_1.images[0], ort_outputs_2.images[0], atol=1e-4, rtol=1e-2) - @parameterized.expand(SUPPORTED_ARCHITECTURES) + @parameterized.expand(NEGATIVE_PROMPT_SUPPORTED_ARCHITECTURES) def test_negative_prompt(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} self._setup(model_args) height, width, batch_size = 64, 64, 1 inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) + inputs["negative_prompt"] = ["This is a negative prompt"] * batch_size - negative_prompt = ["This is a negative prompt"] - pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + ort_pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) + diffusers_pipeline = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) - images_1 = pipeline(**inputs, negative_prompt=negative_prompt, generator=get_generator("pt", SEED)).images - prompt = inputs.pop("prompt") - - if model_arch == "stable-diffusion-xl": - ( - inputs["prompt_embeds"], - inputs["negative_prompt_embeds"], - inputs["pooled_prompt_embeds"], - inputs["negative_pooled_prompt_embeds"], - ) = pipeline.encode_prompt( - prompt=prompt, - num_images_per_prompt=1, - device=torch.device("cpu"), - do_classifier_free_guidance=True, - negative_prompt=negative_prompt, - ) - else: - inputs["prompt_embeds"], inputs["negative_prompt_embeds"] = pipeline.encode_prompt( - prompt=prompt, - num_images_per_prompt=1, - device=torch.device("cpu"), - do_classifier_free_guidance=True, - negative_prompt=negative_prompt, - ) - - images_2 = pipeline(**inputs, generator=get_generator("pt", SEED)).images - - np.testing.assert_allclose(images_1, images_2, atol=1e-4, rtol=1e-2) + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand( grid_parameters( @@ -285,9 +295,9 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) + self.assertEqual(pipeline.device.type, "cuda") outputs = pipeline(**inputs).images - self.assertIsInstance(outputs, np.ndarray) self.assertEqual(outputs.shape, (batch_size, height, width, 3)) @@ -326,7 +336,19 @@ def test_safety_checker(self, model_arch: str): class ORTPipelineForImage2ImageTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl", "latent-consistency"] + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + "latent-consistency", + ] AUTOMODEL_CLASS = AutoPipelineForImage2Image ORTMODEL_CLASS = ORTPipelineForImage2Image @@ -373,14 +395,14 @@ def test_num_images_per_prompt(self, model_arch: str): pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) for batch_size in [1, 3]: - for height in [64, 128]: - for width in [64, 128]: + for height in [16, 32]: + for width in [16, 32]: for num_images_per_prompt in [1, 3]: inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) - @parameterized.expand(SUPPORTED_ARCHITECTURES) + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -398,15 +420,16 @@ def __init__(self): def __call__(self, *args, **kwargs) -> None: self.has_been_called = True self.number_of_steps += 1 + return kwargs ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) ort_callback = Callback() auto_callback = Callback() - # callback_steps=1 to trigger callback every step - ort_pipe(**inputs, callback=ort_callback, callback_steps=1) - auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) self.assertTrue(ort_callback.has_been_called) self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) @@ -434,9 +457,19 @@ def test_shape(self, model_arch: str): elif output_type == "pt": self.assertEqual(outputs.shape, (batch_size, 3, height, width)) else: + out_channels = ( + pipeline.unet.config.out_channels + if pipeline.unet is not None + else pipeline.transformer.config.out_channels + ) self.assertEqual( outputs.shape, - (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + ( + batch_size, + out_channels, + height // pipeline.vae_scale_factor, + width // pipeline.vae_scale_factor, + ), ) @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -454,10 +487,10 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): for output_type in ["latent", "np", "pt"]: inputs["output_type"] = output_type - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -541,7 +574,17 @@ def test_safety_checker(self, model_arch: str): class ORTPipelineForInpaintingTest(ORTModelTestMixin): - SUPPORTED_ARCHITECTURES = ["stable-diffusion", "stable-diffusion-xl"] + SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + ] + if check_if_transformers_greater("4.45"): + SUPPORTED_ARCHITECTURES += ["stable-diffusion-3"] + + CALLBACK_SUPPORTED_ARCHITECTURES = [ + "stable-diffusion", + "stable-diffusion-xl", + ] AUTOMODEL_CLASS = AutoPipelineForInpainting ORTMODEL_CLASS = ORTPipelineForInpainting @@ -593,14 +636,14 @@ def test_num_images_per_prompt(self, model_arch: str): pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) for batch_size in [1, 3]: - for height in [64, 128]: - for width in [64, 128]: + for height in [16, 32]: + for width in [16, 32]: for num_images_per_prompt in [1, 3]: inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) outputs = pipeline(**inputs, num_images_per_prompt=num_images_per_prompt).images self.assertEqual(outputs.shape, (batch_size * num_images_per_prompt, height, width, 3)) - @parameterized.expand(SUPPORTED_ARCHITECTURES) + @parameterized.expand(CALLBACK_SUPPORTED_ARCHITECTURES) @require_diffusers def test_callback(self, model_arch: str): model_args = {"test_name": model_arch, "model_arch": model_arch} @@ -618,15 +661,16 @@ def __init__(self): def __call__(self, *args, **kwargs) -> None: self.has_been_called = True self.number_of_steps += 1 + return kwargs ort_pipe = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[model_arch]) auto_pipe = self.AUTOMODEL_CLASS.from_pretrained(MODEL_NAMES[model_arch]) ort_callback = Callback() auto_callback = Callback() - # callback_steps=1 to trigger callback every step - ort_pipe(**inputs, callback=ort_callback, callback_steps=1) - auto_pipe(**inputs, callback=auto_callback, callback_steps=1) + + ort_pipe(**inputs, callback_on_step_end=ort_callback) + auto_pipe(**inputs, callback_on_step_end=auto_callback) self.assertTrue(ort_callback.has_been_called) self.assertEqual(ort_callback.number_of_steps, auto_callback.number_of_steps) @@ -654,9 +698,19 @@ def test_shape(self, model_arch: str): elif output_type == "pt": self.assertEqual(outputs.shape, (batch_size, 3, height, width)) else: + out_channels = ( + pipeline.unet.config.out_channels + if pipeline.unet is not None + else pipeline.transformer.config.out_channels + ) self.assertEqual( outputs.shape, - (batch_size, 4, height // pipeline.vae_scale_factor, width // pipeline.vae_scale_factor), + ( + batch_size, + out_channels, + height // pipeline.vae_scale_factor, + width // pipeline.vae_scale_factor, + ), ) @parameterized.expand(SUPPORTED_ARCHITECTURES) @@ -674,10 +728,10 @@ def test_compare_to_diffusers_pipeline(self, model_arch: str): for output_type in ["latent", "np", "pt"]: inputs["output_type"] = output_type - ort_output = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images - diffusers_output = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images + ort_images = ort_pipeline(**inputs, generator=get_generator("pt", SEED)).images + diffusers_images = diffusers_pipeline(**inputs, generator=get_generator("pt", SEED)).images - np.testing.assert_allclose(ort_output, diffusers_output, atol=1e-4, rtol=1e-2) + np.testing.assert_allclose(ort_images, diffusers_images, atol=1e-4, rtol=1e-2) @parameterized.expand(SUPPORTED_ARCHITECTURES) @require_diffusers @@ -719,7 +773,7 @@ def test_pipeline_on_gpu(self, test_name: str, model_arch: str, provider: str): inputs = self.generate_inputs(height=height, width=width, batch_size=batch_size) pipeline = self.ORTMODEL_CLASS.from_pretrained(self.onnx_model_dirs[test_name], provider=provider) - self.assertEqual(pipeline.device, "cuda") + self.assertEqual(pipeline.device.type, "cuda") outputs = pipeline(**inputs).images self.assertIsInstance(outputs, np.ndarray) diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index 84ac27029f9..c4340dcd8b6 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -143,7 +143,7 @@ class ORTModelIntegrationTest(unittest.TestCase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.TEST_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" - self.LOCAL_MODEL_PATH = "assets/onnx" + self.LOCAL_MODEL_PATH = "tests/assets/onnx" self.ONNX_MODEL_ID = "philschmid/distilbert-onnx" self.TINY_ONNX_MODEL_ID = "fxmarty/resnet-tiny-beans" self.FAIL_ONNX_MODEL_ID = "sshleifer/tiny-distilbert-base-cased-distilled-squad" diff --git a/tests/onnxruntime/test_quantization.py b/tests/onnxruntime/test_quantization.py index 34a9504f95a..cf451590fbd 100644 --- a/tests/onnxruntime/test_quantization.py +++ b/tests/onnxruntime/test_quantization.py @@ -42,10 +42,10 @@ class ORTQuantizerTest(unittest.TestCase): LOAD_CONFIGURATION = { "local_asset": { - "model_or_path": "assets/onnx", + "model_or_path": "tests/assets/onnx", }, "local_asset_different_name": { - "model_or_path": "assets/onnx", + "model_or_path": "tests/assets/onnx", "file_name": "different_name.onnx", }, "ort_model_class": { diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index 9f200e69b3d..ba8f6cc4abc 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -98,6 +98,7 @@ }, "falcon": "fxmarty/really-tiny-falcon-testing", "flaubert": "hf-internal-testing/tiny-random-flaubert", + "flux": "optimum-internal-testing/tiny-random-flux", "gemma": "fxmarty/tiny-random-GemmaForCausalLM", "gpt2": "hf-internal-testing/tiny-random-gpt2", "gpt_bigcode": "hf-internal-testing/tiny-random-GPTBigCodeModel", @@ -108,10 +109,10 @@ "groupvit": "hf-internal-testing/tiny-random-groupvit", "hubert": "hf-internal-testing/tiny-random-HubertModel", "ibert": "hf-internal-testing/tiny-random-IBertModel", - "levit": "hf-internal-testing/tiny-random-LevitModel", "latent-consistency": "echarlaix/tiny-random-latent-consistency", "layoutlm": "hf-internal-testing/tiny-random-LayoutLMModel", "layoutlmv3": "hf-internal-testing/tiny-random-LayoutLMv3Model", + "levit": "hf-internal-testing/tiny-random-LevitModel", "longt5": "hf-internal-testing/tiny-random-LongT5Model", "llama": "optimum-internal-testing/tiny-random-llama", "m2m_100": "hf-internal-testing/tiny-random-m2m_100", @@ -143,6 +144,7 @@ "squeezebert": "hf-internal-testing/tiny-random-SqueezeBertModel", "speech_to_text": "hf-internal-testing/tiny-random-Speech2TextModel", "stable-diffusion": "hf-internal-testing/tiny-stable-diffusion-torch", + "stable-diffusion-3": "optimum-internal-testing/tiny-random-stable-diffusion-3", "stable-diffusion-xl": "echarlaix/tiny-random-stable-diffusion-xl", "swin": "hf-internal-testing/tiny-random-SwinModel", "swin-window": "yujiepan/tiny-random-swin-patch4-window7-224", From d2a5a6aa2adbe9561527a85c4a4947a6d7fcfa58 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:03:14 +0100 Subject: [PATCH 05/20] Remove datasets as required dependency (#2087) * remove datasets required dependency * install datasets when needed * add datasets installed when needed * style * add require dataset * divide datasets tests * import datasets only when needed --- .github/workflows/dev_test_benckmark.yml | 2 +- .github/workflows/test_benckmark.yml | 2 +- .github/workflows/test_utils.yml | 11 ++++++++++- optimum/gptq/data.py | 16 ++++++++++++++- optimum/gptq/quantizer.py | 2 +- optimum/onnxruntime/configuration.py | 15 +++++++++----- optimum/onnxruntime/model.py | 9 ++++++--- optimum/onnxruntime/quantization.py | 17 ++++++++++------ optimum/onnxruntime/runs/calibrator.py | 10 ++++++---- optimum/runs_base.py | 8 +++++--- optimum/utils/__init__.py | 1 + optimum/utils/import_utils.py | 12 ++++++++++++ optimum/utils/preprocessing/base.py | 19 +++++++++++++----- optimum/utils/testing_utils.py | 5 +++++ pyproject.toml | 1 + setup.py | 3 --- tests/utils/test_task_processors.py | 25 +++++++++++++++++++++++- 17 files changed, 123 insertions(+), 35 deletions(-) diff --git a/.github/workflows/dev_test_benckmark.yml b/.github/workflows/dev_test_benckmark.yml index a898d288625..381197b129a 100644 --- a/.github/workflows/dev_test_benckmark.yml +++ b/.github/workflows/dev_test_benckmark.yml @@ -23,7 +23,7 @@ jobs: - name: Install dependencies run: | pip install wheel - pip install .[tests,onnxruntime,benchmark] + pip install .[tests,onnxruntime,benchmark] datasets pip install -U git+https://github.com/huggingface/evaluate pip install -U git+https://github.com/huggingface/diffusers pip install -U git+https://github.com/huggingface/transformers diff --git a/.github/workflows/test_benckmark.yml b/.github/workflows/test_benckmark.yml index e859e845d64..fe7df1a20cc 100644 --- a/.github/workflows/test_benckmark.yml +++ b/.github/workflows/test_benckmark.yml @@ -30,7 +30,7 @@ jobs: - name: Install dependencies run: | pip install wheel - pip install .[tests,onnxruntime,benchmark] + pip install .[tests,onnxruntime,benchmark] datasets - name: Test with unittest run: | python -m unittest discover --start-directory tests/benchmark --pattern 'test_*.py' diff --git a/.github/workflows/test_utils.yml b/.github/workflows/test_utils.yml index 0126b023c60..bbe00e62841 100644 --- a/.github/workflows/test_utils.yml +++ b/.github/workflows/test_utils.yml @@ -37,4 +37,13 @@ jobs: - name: Test with pytest working-directory: tests run: | - python -m pytest -s -vvvv utils + pytest utils -s -n auto -m "not datasets_test" --durations=0 + + - name: Install datasets + run: | + pip install datasets + + - name: Tests needing datasets + working-directory: tests + run: | + pytest utils -s -n auto -m "datasets_test" --durations=0 \ No newline at end of file diff --git a/optimum/gptq/data.py b/optimum/gptq/data.py index b8734da478e..7e5fc0b43db 100644 --- a/optimum/gptq/data.py +++ b/optimum/gptq/data.py @@ -18,7 +18,12 @@ import numpy as np import torch -from datasets import load_dataset + +from optimum.utils.import_utils import DATASETS_IMPORT_ERROR, is_datasets_available + + +if is_datasets_available(): + from datasets import load_dataset """ @@ -113,6 +118,9 @@ def pad_block(block, pads): def get_wikitext2(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_wikitext2")) + if split == "train": data = load_dataset("wikitext", "wikitext-2-raw-v1", split="train") elif split == "validation": @@ -132,6 +140,9 @@ def get_wikitext2(tokenizer: Any, seqlen: int, nsamples: int, split: str = "trai def get_c4(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_c4")) + if split == "train": data = load_dataset("allenai/c4", split="train", data_files={"train": "en/c4-train.00000-of-01024.json.gz"}) elif split == "validation": @@ -157,6 +168,9 @@ def get_c4(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): def get_c4_new(tokenizer: Any, seqlen: int, nsamples: int, split: str = "train"): + if not is_datasets_available(): + raise ImportError(DATASETS_IMPORT_ERROR.format("get_c4_new")) + if split == "train": data = load_dataset("allenai/c4", split="train", data_files={"train": "en/c4-train.00000-of-01024.json.gz"}) elif split == "validation": diff --git a/optimum/gptq/quantizer.py b/optimum/gptq/quantizer.py index 949d4d260df..849d8821ebf 100644 --- a/optimum/gptq/quantizer.py +++ b/optimum/gptq/quantizer.py @@ -88,7 +88,7 @@ def __init__( dataset (`Union[List[str], str, Any]`, defaults to `None`): The dataset used for quantization. You can provide your own dataset in a list of string or in a list of tokenized data (e.g. [{ "input_ids": [ 1, 100, 15, ... ],"attention_mask": [ 1, 1, 1, ... ]},...]) - or just use the original datasets used in GPTQ paper ['wikitext2','c4','c4-new','ptb','ptb-new']. + or just use the original datasets used in GPTQ paper ['wikitext2','c4','c4-new']. group_size (int, defaults to 128): The group size to use for quantization. Recommended value is 128 and -1 uses per-column quantization. damp_percent (`float`, defaults to `0.1`): diff --git a/optimum/onnxruntime/configuration.py b/optimum/onnxruntime/configuration.py index 2e3d9f32d6a..adc1984795a 100644 --- a/optimum/onnxruntime/configuration.py +++ b/optimum/onnxruntime/configuration.py @@ -18,9 +18,8 @@ from dataclasses import asdict, dataclass, field from enum import Enum from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from datasets import Dataset from packaging.version import Version, parse from onnxruntime import __version__ as ort_version @@ -33,6 +32,10 @@ from ..utils import logging +if TYPE_CHECKING: + from datasets import Dataset + + logger = logging.get_logger(__name__) # This value is used to indicate ORT which axis it should use to quantize an operator "per-channel" @@ -117,7 +120,9 @@ def create_calibrator( class AutoCalibrationConfig: @staticmethod - def minmax(dataset: Dataset, moving_average: bool = False, averaging_constant: float = 0.01) -> CalibrationConfig: + def minmax( + dataset: "Dataset", moving_average: bool = False, averaging_constant: float = 0.01 + ) -> CalibrationConfig: """ Args: dataset (`Dataset`): @@ -151,7 +156,7 @@ def minmax(dataset: Dataset, moving_average: bool = False, averaging_constant: f @staticmethod def entropy( - dataset: Dataset, + dataset: "Dataset", num_bins: int = 128, num_quantized_bins: int = 128, ) -> CalibrationConfig: @@ -188,7 +193,7 @@ def entropy( ) @staticmethod - def percentiles(dataset: Dataset, num_bins: int = 2048, percentile: float = 99.999) -> CalibrationConfig: + def percentiles(dataset: "Dataset", num_bins: int = 2048, percentile: float = 99.999) -> CalibrationConfig: """ Args: dataset (`Dataset`): diff --git a/optimum/onnxruntime/model.py b/optimum/onnxruntime/model.py index caa662f3824..4182abc925f 100644 --- a/optimum/onnxruntime/model.py +++ b/optimum/onnxruntime/model.py @@ -14,10 +14,9 @@ import logging import os -from typing import Callable, Dict, List, Optional, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Union import numpy as np -from datasets import Dataset from transformers import EvalPrediction from transformers.trainer_pt_utils import nested_concat from transformers.trainer_utils import EvalLoopOutput @@ -25,6 +24,10 @@ from onnxruntime import InferenceSession +if TYPE_CHECKING: + from datasets import Dataset + + logger = logging.getLogger(__name__) @@ -59,7 +62,7 @@ def __init__( self.session = InferenceSession(str(model_path), providers=[execution_provider]) self.onnx_input_names = {input_key.name: idx for idx, input_key in enumerate(self.session.get_inputs())} - def evaluation_loop(self, dataset: Dataset): + def evaluation_loop(self, dataset: "Dataset"): """ Run evaluation and returns metrics and predictions. diff --git a/optimum/onnxruntime/quantization.py b/optimum/onnxruntime/quantization.py index f637916dcd2..054a2310a6b 100644 --- a/optimum/onnxruntime/quantization.py +++ b/optimum/onnxruntime/quantization.py @@ -21,7 +21,6 @@ from typing import TYPE_CHECKING, Callable, Dict, List, Optional, Tuple, Union import onnx -from datasets import Dataset, load_dataset from packaging.version import Version, parse from transformers import AutoConfig @@ -29,6 +28,7 @@ from onnxruntime.quantization import CalibrationDataReader, QuantFormat, QuantizationMode, QuantType from onnxruntime.quantization.onnx_quantizer import ONNXQuantizer from onnxruntime.quantization.qdq_quantizer import QDQQuantizer +from optimum.utils.import_utils import requires_backends from ..quantization_base import OptimumQuantizer from ..utils.save_utils import maybe_save_preprocessors @@ -40,6 +40,7 @@ if TYPE_CHECKING: + from datasets import Dataset from transformers import PretrainedConfig LOGGER = logging.getLogger(__name__) @@ -48,7 +49,7 @@ class ORTCalibrationDataReader(CalibrationDataReader): __slots__ = ["batch_size", "dataset", "_dataset_iter"] - def __init__(self, dataset: Dataset, batch_size: int = 1): + def __init__(self, dataset: "Dataset", batch_size: int = 1): if dataset is None: raise ValueError("Provided dataset is None.") @@ -158,7 +159,7 @@ def from_pretrained( def fit( self, - dataset: Dataset, + dataset: "Dataset", calibration_config: CalibrationConfig, onnx_augmented_model_name: Union[str, Path] = "augmented_model.onnx", operators_to_quantize: Optional[List[str]] = None, @@ -212,7 +213,7 @@ def fit( def partial_fit( self, - dataset: Dataset, + dataset: "Dataset", calibration_config: CalibrationConfig, onnx_augmented_model_name: Union[str, Path] = "augmented_model.onnx", operators_to_quantize: Optional[List[str]] = None, @@ -428,7 +429,7 @@ def get_calibration_dataset( seed: int = 2016, use_auth_token: Optional[Union[bool, str]] = None, token: Optional[Union[bool, str]] = None, - ) -> Dataset: + ) -> "Dataset": """ Creates the calibration `datasets.Dataset` to use for the post-training static quantization calibration step. @@ -474,6 +475,10 @@ def get_calibration_dataset( "provided." ) + requires_backends(self, ["datasets"]) + + from datasets import load_dataset + calib_dataset = load_dataset( dataset_name, name=dataset_config_name, @@ -492,7 +497,7 @@ def get_calibration_dataset( return self.clean_calibration_dataset(processed_calib_dataset) - def clean_calibration_dataset(self, dataset: Dataset) -> Dataset: + def clean_calibration_dataset(self, dataset: "Dataset") -> "Dataset": model = onnx.load(self.onnx_model_path) model_inputs = {input.name for input in model.graph.input} ignored_columns = list(set(dataset.column_names) - model_inputs) diff --git a/optimum/onnxruntime/runs/calibrator.py b/optimum/onnxruntime/runs/calibrator.py index c493a943747..bfdcd64d92e 100644 --- a/optimum/onnxruntime/runs/calibrator.py +++ b/optimum/onnxruntime/runs/calibrator.py @@ -1,6 +1,4 @@ -from typing import Dict, List - -from datasets import Dataset +from typing import TYPE_CHECKING, Dict, List from ...runs_base import Calibrator from .. import ORTQuantizer @@ -9,10 +7,14 @@ from ..preprocessors.passes import ExcludeGeLUNodes, ExcludeLayerNormNodes, ExcludeNodeAfter, ExcludeNodeFollowedBy +if TYPE_CHECKING: + from datasets import Dataset + + class OnnxRuntimeCalibrator(Calibrator): def __init__( self, - calibration_dataset: Dataset, + calibration_dataset: "Dataset", quantizer: ORTQuantizer, model_path: str, qconfig: QuantizationConfig, diff --git a/optimum/runs_base.py b/optimum/runs_base.py index 3a1d164c602..dadd445818f 100644 --- a/optimum/runs_base.py +++ b/optimum/runs_base.py @@ -2,13 +2,12 @@ import subprocess from contextlib import contextmanager from time import perf_counter_ns -from typing import Set +from typing import TYPE_CHECKING, Set import numpy as np import optuna import torch import transformers -from datasets import Dataset from tqdm import trange from . import version as optimum_version @@ -21,6 +20,9 @@ from .utils.runs import RunConfig, cpu_info_command +if TYPE_CHECKING: + from datasets import Dataset + os.environ["TOKENIZERS_PARALLELISM"] = "false" @@ -34,7 +36,7 @@ def get_autoclass_name(task): class Calibrator: def __init__( - self, calibration_dataset: Dataset, quantizer, model_path, qconfig, calibration_params, node_exclusion + self, calibration_dataset: "Dataset", quantizer, model_path, qconfig, calibration_params, node_exclusion ): self.calibration_dataset = calibration_dataset self.quantizer = quantizer diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py index 40d93d298e4..fb1794af49c 100644 --- a/optimum/utils/__init__.py +++ b/optimum/utils/__init__.py @@ -35,6 +35,7 @@ check_if_transformers_greater, is_accelerate_available, is_auto_gptq_available, + is_datasets_available, is_diffusers_available, is_onnx_available, is_onnxruntime_available, diff --git a/optimum/utils/import_utils.py b/optimum/utils/import_utils.py index 35a6294ab52..405e3815b33 100644 --- a/optimum/utils/import_utils.py +++ b/optimum/utils/import_utils.py @@ -69,6 +69,7 @@ def _is_package_available(pkg_name: str, return_version: bool = False) -> Union[ _auto_gptq_available = _is_package_available("auto_gptq") _timm_available = _is_package_available("timm") _sentence_transformers_available = _is_package_available("sentence_transformers") +_datasets_available = _is_package_available("datasets") torch_version = None if is_torch_available(): @@ -131,6 +132,10 @@ def is_sentence_transformers_available(): return _sentence_transformers_available +def is_datasets_available(): + return _datasets_available + + def is_auto_gptq_available(): if _auto_gptq_available: version_autogptq = version.parse(importlib_metadata.version("auto_gptq")) @@ -230,6 +235,12 @@ def require_numpy_strictly_lower(package_version: str, message: str): -U transformers`. Please note that you may need to restart your runtime after installation. """ +DATASETS_IMPORT_ERROR = """ +{0} requires the datasets library but it was not found in your environment. You can install it with pip: +`pip install datasets`. Please note that you may need to restart your runtime after installation. +""" + + BACKENDS_MAPPING = OrderedDict( [ ("diffusers", (is_diffusers_available, DIFFUSERS_IMPORT_ERROR)), @@ -245,6 +256,7 @@ def require_numpy_strictly_lower(package_version: str, message: str): "transformers_434", (lambda: check_if_transformers_greater("4.34"), "{0} " + TRANSFORMERS_IMPORT_ERROR.format("4.34")), ), + ("datasets", (is_datasets_available, DATASETS_IMPORT_ERROR)), ] ) diff --git a/optimum/utils/preprocessing/base.py b/optimum/utils/preprocessing/base.py index dc995ccc50b..7cfda13ba7d 100644 --- a/optimum/utils/preprocessing/base.py +++ b/optimum/utils/preprocessing/base.py @@ -20,15 +20,16 @@ from abc import ABC, abstractmethod from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union -from datasets import Dataset, DatasetDict -from datasets import load_dataset as datasets_load_dataset from transformers import PreTrainedTokenizerBase from transformers.image_processing_utils import BaseImageProcessor +from optimum.utils.import_utils import requires_backends + from .. import logging if TYPE_CHECKING: + from datasets import Dataset, DatasetDict from transformers import PretrainedConfig @@ -102,11 +103,14 @@ def create_dataset_processing_func( def prepare_dataset( self, - dataset: Union[DatasetDict, Dataset], + dataset: Union["DatasetDict", "Dataset"], data_keys: Dict[str, str], ref_keys: Optional[List[str]] = None, split: Optional[str] = None, - ) -> Union[DatasetDict, Dataset]: + ) -> Union["DatasetDict", "Dataset"]: + requires_backends(self, ["datasets"]) + from datasets import Dataset + if isinstance(dataset, Dataset) and split is not None: raise ValueError("A Dataset and a split name were provided, but splits are for DatasetDict.") elif split is not None: @@ -131,7 +135,12 @@ def load_dataset( num_samples: Optional[int] = None, shuffle: bool = False, **load_dataset_kwargs, - ) -> Union[DatasetDict, Dataset]: + ) -> Union["DatasetDict", "Dataset"]: + requires_backends(self, ["datasets"]) + + from datasets import Dataset, DatasetDict + from datasets import load_dataset as datasets_load_dataset + dataset = datasets_load_dataset(path, **load_dataset_kwargs) if isinstance(dataset, DatasetDict) and load_smallest_split: diff --git a/optimum/utils/testing_utils.py b/optimum/utils/testing_utils.py index 76fe9a05b13..88b1acdb780 100644 --- a/optimum/utils/testing_utils.py +++ b/optimum/utils/testing_utils.py @@ -28,6 +28,7 @@ from . import ( is_accelerate_available, is_auto_gptq_available, + is_datasets_available, is_diffusers_available, is_sentence_transformers_available, is_timm_available, @@ -146,6 +147,10 @@ def require_sentence_transformers(test_case): return unittest.skipUnless(is_sentence_transformers_available(), "test requires sentence-transformers")(test_case) +def require_datasets(test_case): + return unittest.skipUnless(is_datasets_available(), "test requires datasets")(test_case) + + def grid_parameters( parameters: Dict[str, Iterable[Any]], yield_dict: bool = False, diff --git a/pyproject.toml b/pyproject.toml index 99a0f1c85fa..17bcd90e066 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ markers = [ "rocm_ep_test", "tensorflow_test", "timm_test", + "datasets_test", "run_in_series", "run_slow", "accelerate_test", diff --git a/setup.py b/setup.py index 29f97b604e0..6736085943a 100644 --- a/setup.py +++ b/setup.py @@ -13,14 +13,11 @@ REQUIRED_PKGS = [ - "coloredlogs", - "sympy", "transformers>=4.29", "torch>=1.11", "packaging", "numpy", "huggingface_hub>=0.8.0", - "datasets", ] # TODO: unpin pytest once https://github.com/huggingface/transformers/pull/29154 is merged & released diff --git a/tests/utils/test_task_processors.py b/tests/utils/test_task_processors.py index 16567048073..1a9f352a79f 100644 --- a/tests/utils/test_task_processors.py +++ b/tests/utils/test_task_processors.py @@ -19,16 +19,21 @@ from typing import TYPE_CHECKING, Any, Dict, Tuple, Union from unittest import TestCase -from datasets import DatasetDict +import pytest from transformers import AutoConfig, AutoFeatureExtractor, AutoTokenizer +from optimum.utils.import_utils import is_datasets_available from optimum.utils.preprocessing import TaskProcessorsManager +from optimum.utils.testing_utils import require_datasets if TYPE_CHECKING: from transformers import PretrainedConfig, PreTrainedTokenizerBase from transformers.image_processing_utils import BaseImageProcessor +if is_datasets_available(): + from datasets import DatasetDict + TEXT_MODEL_NAME = "bert-base-uncased" CONFIG = AutoConfig.from_pretrained(TEXT_MODEL_NAME) @@ -122,6 +127,8 @@ def test_create_defaults_and_kwargs_from_preprocessor_kwargs_does_not_mutate_pre ) self.assertDictEqual(preprocessor_kwargs, clone) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_unallowed_data_keys(self): task_processor = TaskProcessorsManager.get_task_processor_class_for_task(self.TASK_NAME)( self.CONFIG, self.PREPROCESSOR @@ -188,15 +195,23 @@ def _test_load_dataset( return dataset + @require_datasets + @pytest.mark.datasets_test def test_load_dataset(self): return self._test_load_dataset(False, False, False) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_by_guessing_data_keys(self): return self._test_load_dataset(False, True, False) + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_and_only_keep_necessary_columns(self): return self._test_load_dataset(False, False, True) + @require_datasets + @pytest.mark.datasets_test def test_load_default_dataset(self): return self._test_load_dataset(True, False, False) @@ -207,6 +222,8 @@ class TextClassificationProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = random.randint(4, 16) dataset = self._test_load_dataset(False, False, True, max_length=max_length) @@ -223,6 +240,8 @@ class TokenClassificationProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = random.randint(4, 16) dataset = self._test_load_dataset(False, False, True, max_length=max_length) @@ -232,6 +251,8 @@ def test_load_dataset_with_max_length(self): input_ids = dataset[0]["input_ids"] self.assertEqual(len(input_ids), max_length) + @require_datasets + @pytest.mark.datasets_test def test_load_default_dataset(self): self.skipTest( "Skipping so as not to execute conll2003 remote code (test would require trust_remote_code=True)" @@ -244,6 +265,8 @@ class QuestionAnsweringProcessorTest(TestCase, TaskProcessorTestBase): PREPROCESSOR = TOKENIZER WRONG_PREPROCESSOR = IMAGE_PROCESSOR + @require_datasets + @pytest.mark.datasets_test def test_load_dataset_with_max_length(self): max_length = 384 dataset = self._test_load_dataset(False, False, True, max_length=max_length) From 65a8a94adaf136dd677d28cfc837c0acfe993031 Mon Sep 17 00:00:00 2001 From: Raghu Ramarao Date: Mon, 25 Nov 2024 18:30:00 +0530 Subject: [PATCH 06/20] Add ONNX Support for Decision Transformer Model (#2038) * Decision Transformer to ONNX V0.1 * Decision Transformer to ONNX V0.2 * Update optimum/exporters/onnx/model_configs.py * Apply suggestions from code review * Update optimum/exporters/onnx/base.py * Update optimum/exporters/onnx/model_configs.py * Update optimum/utils/input_generators.py * Update optimum/exporters/onnx/model_configs.py * Apply suggestions from code review * Update optimum/exporters/tasks.py * ONNXToDT: changes to order of OrderedDict elements * make style changes * test * remove custom normalized config * remove unncessary dynamic axes --------- Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> Co-authored-by: IlyasMoutawwakil --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 25 +++++++++++++++++ optimum/exporters/tasks.py | 9 ++++++ optimum/utils/__init__.py | 1 + optimum/utils/input_generators.py | 37 +++++++++++++++++++++++++ tests/exporters/exporters_utils.py | 1 + 6 files changed, 74 insertions(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 747e1396fb4..2eaada7dadd 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -36,6 +36,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - Data2VecVision - Deberta - Deberta-v2 +- Decision Transformer - Deit - Detr - DistilBert diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 8984162ee8c..bca7cf24acf 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -27,6 +27,7 @@ BloomDummyPastKeyValuesGenerator, DummyAudioInputGenerator, DummyCodegenDecoderTextInputGenerator, + DummyDecisionTransformerInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, DummyFluxTransformerTextInputGenerator, @@ -263,6 +264,30 @@ class ImageGPTOnnxConfig(GPT2OnnxConfig): pass +class DecisionTransformerOnnxConfig(OnnxConfig): + DUMMY_INPUT_GENERATOR_CLASSES = (DummyDecisionTransformerInputGenerator,) + NORMALIZED_CONFIG_CLASS = NormalizedConfig + + @property + def inputs(self) -> Dict[str, Dict[int, str]]: + return { + "states": {0: "batch_size", 1: "sequence_length"}, + "actions": {0: "batch_size", 1: "sequence_length"}, + "timesteps": {0: "batch_size", 1: "sequence_length"}, + "returns_to_go": {0: "batch_size", 1: "sequence_length"}, + "attention_mask": {0: "batch_size", 1: "sequence_length"}, + } + + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "state_preds": {0: "batch_size", 1: "sequence_length"}, + "action_preds": {0: "batch_size", 1: "sequence_length"}, + "return_preds": {0: "batch_size", 1: "sequence_length"}, + "last_hidden_state": {0: "batch_size", 1: "sequence_length"}, + } + + class GPTNeoOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): DEFAULT_ONNX_OPSET = 14 NORMALIZED_CONFIG_CLASS = NormalizedTextConfig.with_args(num_attention_heads="num_heads") diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index b4bce4696f3..8f28ec42ce9 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -217,6 +217,7 @@ class TasksManager: "multiple-choice": "AutoModelForMultipleChoice", "object-detection": "AutoModelForObjectDetection", "question-answering": "AutoModelForQuestionAnswering", + "reinforcement-learning": "AutoModel", "semantic-segmentation": "AutoModelForSemanticSegmentation", "text-to-audio": ("AutoModelForTextToSpectrogram", "AutoModelForTextToWaveform"), "text-generation": "AutoModelForCausalLM", @@ -574,6 +575,11 @@ class TasksManager: onnx="DebertaV2OnnxConfig", tflite="DebertaV2TFLiteConfig", ), + "decision-transformer": supported_tasks_mapping( + "feature-extraction", + "reinforcement-learning", + onnx="DecisionTransformerOnnxConfig", + ), "deit": supported_tasks_mapping( "feature-extraction", "image-classification", @@ -2085,6 +2091,9 @@ def get_model_from_task( if original_task == "automatic-speech-recognition" or task == "automatic-speech-recognition": if original_task == "auto" and config.architectures is not None: model_class_name = config.architectures[0] + elif original_task == "reinforcement-learning" or task == "reinforcement-learning": + if config.architectures is not None: + model_class_name = config.architectures[0] if library_name == "diffusers": config = DiffusionPipeline.load_config(model_name_or_path, **kwargs) diff --git a/optimum/utils/__init__.py b/optimum/utils/__init__.py index fb1794af49c..2aa90253d08 100644 --- a/optimum/utils/__init__.py +++ b/optimum/utils/__init__.py @@ -53,6 +53,7 @@ DummyAudioInputGenerator, DummyBboxInputGenerator, DummyCodegenDecoderTextInputGenerator, + DummyDecisionTransformerInputGenerator, DummyDecoderTextInputGenerator, DummyEncodecInputGenerator, DummyFluxTransformerTextInputGenerator, diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 148072aa0b4..0ac1805f97d 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -507,6 +507,43 @@ class DummyDecoderTextInputGenerator(DummyTextInputGenerator): ) +class DummyDecisionTransformerInputGenerator(DummyTextInputGenerator): + """ + Generates dummy decision transformer inputs. + """ + + SUPPORTED_INPUT_NAMES = ( + "states", + "actions", + "timesteps", + "returns_to_go", + "attention_mask", + ) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.act_dim = self.normalized_config.config.act_dim + self.state_dim = self.normalized_config.config.state_dim + self.max_ep_len = self.normalized_config.config.max_ep_len + + def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): + if input_name == "states": + shape = [self.batch_size, self.sequence_length, self.state_dim] + elif input_name == "actions": + shape = [self.batch_size, self.sequence_length, self.act_dim] + elif input_name == "rewards": + shape = [self.batch_size, self.sequence_length, 1] + elif input_name == "returns_to_go": + shape = [self.batch_size, self.sequence_length, 1] + elif input_name == "attention_mask": + shape = [self.batch_size, self.sequence_length] + elif input_name == "timesteps": + shape = [self.batch_size, self.sequence_length] + return self.random_int_tensor(shape=shape, max_value=self.max_ep_len, framework=framework, dtype=int_dtype) + + return self.random_float_tensor(shape, min_value=-2.0, max_value=2.0, framework=framework, dtype=float_dtype) + + class DummySeq2SeqDecoderTextInputGenerator(DummyDecoderTextInputGenerator): SUPPORTED_INPUT_NAMES = ( "decoder_input_ids", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 31059c403de..c56132c384c 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -67,6 +67,7 @@ "data2vec-audio": "hf-internal-testing/tiny-random-Data2VecAudioModel", "deberta": "hf-internal-testing/tiny-random-DebertaModel", "deberta-v2": "hf-internal-testing/tiny-random-DebertaV2Model", + "decision-transformer": "edbeeching/decision-transformer-gym-hopper-medium", "deit": "hf-internal-testing/tiny-random-DeiTModel", "donut": "fxmarty/tiny-doc-qa-vision-encoder-decoder", "donut-swin": "hf-internal-testing/tiny-random-DonutSwinModel", From a6c696c7de105e7691d432dd80102beec78d8fd4 Mon Sep 17 00:00:00 2001 From: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> Date: Tue, 26 Nov 2024 20:52:43 +0100 Subject: [PATCH 07/20] Generate guidance for flux (#2104) generate guidance --- optimum/onnxruntime/modeling_diffusion.py | 17 +++++++++++++++-- optimum/utils/input_generators.py | 4 ++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/optimum/onnxruntime/modeling_diffusion.py b/optimum/onnxruntime/modeling_diffusion.py index 79d302be449..66b08e1ef66 100644 --- a/optimum/onnxruntime/modeling_diffusion.py +++ b/optimum/onnxruntime/modeling_diffusion.py @@ -437,8 +437,21 @@ def to(self, device: Union[torch.device, str, int]): def _load_config(cls, config_name_or_path: Union[str, os.PathLike], **kwargs): return cls.load_config(config_name_or_path, **kwargs) - def _save_config(self, save_directory): - self.save_config(save_directory) + def _save_config(self, save_directory: Union[str, Path]): + model_dir = ( + self.model_save_dir + if not isinstance(self.model_save_dir, TemporaryDirectory) + else self.model_save_dir.name + ) + save_dir = Path(save_directory) + original_config = Path(model_dir) / self.config_name + if original_config.exists(): + if not save_dir.exists(): + save_dir.mkdir(parents=True) + + shutil.copy(original_config, save_dir) + else: + self.save_config(save_directory) @property def components(self) -> Dict[str, Any]: diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index 0ac1805f97d..fbb77e6800a 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -1508,6 +1508,7 @@ class DummyFluxTransformerTextInputGenerator(DummyTransformerTextInputGenerator) SUPPORTED_INPUT_NAMES = ( "encoder_hidden_states", "pooled_projections", + "guidance", "txt_ids", ) @@ -1519,5 +1520,8 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int else [self.batch_size, self.sequence_length, 3] ) return self.random_int_tensor(shape, max_value=1, framework=framework, dtype=int_dtype) + elif input_name == "guidance": + shape = [self.batch_size] + return self.random_float_tensor(shape, min_value=0, max_value=1, framework=framework, dtype=float_dtype) return super().generate(input_name, framework, int_dtype, float_dtype) From bd08f12d2d4ebffdb2a25e32eabab759e4de88e5 Mon Sep 17 00:00:00 2001 From: Jingya HUANG <44135271+JingyaHuang@users.noreply.github.com> Date: Thu, 28 Nov 2024 15:13:11 +0100 Subject: [PATCH 08/20] Unbundle inputs generated by `DummyTimestepInputGenerator` (#2107) unbundle --- optimum/utils/input_generators.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/optimum/utils/input_generators.py b/optimum/utils/input_generators.py index fbb77e6800a..18a2a5a3fd1 100644 --- a/optimum/utils/input_generators.py +++ b/optimum/utils/input_generators.py @@ -897,14 +897,14 @@ def __init__( ): self.task = task self.vocab_size = normalized_config.vocab_size - self.text_encoder_projection_dim = normalized_config.text_encoder_projection_dim - self.time_ids = 5 if normalized_config.requires_aesthetics_score else 6 + self.text_encoder_projection_dim = getattr(normalized_config, "text_encoder_projection_dim", None) + self.time_ids = 5 if getattr(normalized_config, "requires_aesthetics_score", False) else 6 if random_batch_size_range: low, high = random_batch_size_range self.batch_size = random.randint(low, high) else: self.batch_size = batch_size - self.time_cond_proj_dim = normalized_config.config.time_cond_proj_dim + self.time_cond_proj_dim = getattr(normalized_config.config, "time_cond_proj_dim", None) def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"): if input_name == "timestep": @@ -912,8 +912,16 @@ def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int return self.random_float_tensor(shape, max_value=self.vocab_size, framework=framework, dtype=float_dtype) if input_name == "text_embeds": + if self.text_encoder_projection_dim is None: + raise ValueError( + "Unable to infer the value of `text_encoder_projection_dim` for generating `text_embeds`, please double check the config of your model." + ) dim = self.text_encoder_projection_dim elif input_name == "timestep_cond": + if self.time_cond_proj_dim is None: + raise ValueError( + "Unable to infer the value of `time_cond_proj_dim` for generating `timestep_cond`, please double check the config of your model." + ) dim = self.time_cond_proj_dim else: dim = self.time_ids From 28bd0ad8fccfb6dd8019cd2882a88d69386a134c Mon Sep 17 00:00:00 2001 From: Brando Tovar <44623235+bndos@users.noreply.github.com> Date: Thu, 28 Nov 2024 10:13:05 -0500 Subject: [PATCH 09/20] Pass the revision to SentenceTransformer models (#2105) feat: pass revision to SentenceTransformers --- optimum/exporters/tasks.py | 1 + 1 file changed, 1 insertion(+) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 8f28ec42ce9..c50fa5cdfa4 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -2128,6 +2128,7 @@ def get_model_from_task( device=device, cache_folder=cache_folder, token=token, + revision=revision, trust_remote_code=trust_remote_code, ) else: From f22655c036e4e61a7b09748e7aa7e146a16ae64d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Mlyn=C3=A1=C5=99?= <47664722+mlynatom@users.noreply.github.com> Date: Mon, 2 Dec 2024 14:54:08 +0100 Subject: [PATCH 10/20] Add RemBERT ONNX support (#2108) * ONNX config for RemBERT added * added RemBERT to TasksManager * rembert added to exporters_utils * RemBERT added to test modelling tasks * changed rembert model * added RemBERT to test utils * Added RemBERT to documentation * Apply suggestions from code review --------- Co-authored-by: Ilyas Moutawwakil <57442720+IlyasMoutawwakil@users.noreply.github.com> --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 4 ++++ optimum/exporters/tasks.py | 9 +++++++++ tests/exporters/exporters_utils.py | 3 ++- tests/onnxruntime/test_modeling.py | 5 +++++ tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 6 files changed, 22 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 2eaada7dadd..57005b85678 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -83,6 +83,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - PoolFormer - Qwen2(Qwen1.5) - RegNet +- RemBERT - ResNet - Roberta - Roformer diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index bca7cf24acf..b39d19ec782 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -162,6 +162,10 @@ class SplinterOnnxConfig(BertOnnxConfig): DEFAULT_ONNX_OPSET = 11 +class RemBertOnnxConfig(BertOnnxConfig): + DEFAULT_ONNX_OPSET = 11 + + class DistilBertOnnxConfig(BertOnnxConfig): DEFAULT_ONNX_OPSET = 14 # now uses F.scaled_dot_product_attention by default for transformers>=4.46.0 diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index c50fa5cdfa4..0a3758e97cf 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -431,6 +431,15 @@ class TasksManager: onnx="BertOnnxConfig", tflite="BertTFLiteConfig", ), + "rembert": supported_tasks_mapping( + "fill-mask", + "feature-extraction", + "text-classification", + "multiple-choice", + "token-classification", + "question-answering", + onnx="RemBertOnnxConfig", + ), # For big-bird and bigbird-pegasus being unsupported, refer to model_configs.py # "big-bird": supported_tasks_mapping( # "feature-extraction", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index c56132c384c..32156d9eebf 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -138,6 +138,7 @@ "phi3": "Xenova/tiny-random-Phi3ForCausalLM", "pix2struct": "fxmarty/pix2struct-tiny-random", # "rembert": "google/rembert", + "rembert": "hf-internal-testing/tiny-random-RemBertModel", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen2": "fxmarty/tiny-dummy-qwen2", "regnet": "hf-internal-testing/tiny-random-RegNetModel", @@ -257,7 +258,7 @@ "owlv2": "google/owlv2-base-patch16", "owlvit": "google/owlvit-base-patch32", "perceiver": "hf-internal-testing/tiny-random-PerceiverModel", # Not using deepmind/language-perceiver because it takes too much time for testing. - # "rembert": "google/rembert", + "rembert": "google/rembert", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "regnet": "facebook/regnet-y-040", "resnet": "microsoft/resnet-50", diff --git a/tests/onnxruntime/test_modeling.py b/tests/onnxruntime/test_modeling.py index c4340dcd8b6..8f52ef45180 100644 --- a/tests/onnxruntime/test_modeling.py +++ b/tests/onnxruntime/test_modeling.py @@ -1312,6 +1312,7 @@ class ORTModelForQuestionAnsweringIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm_qa", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1502,6 +1503,7 @@ class ORTModelForMaskedLMIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1682,6 +1684,7 @@ class ORTModelForSequenceClassificationIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -1882,6 +1885,7 @@ class ORTModelForTokenClassificationIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} @@ -2227,6 +2231,7 @@ class ORTModelForMultipleChoiceIntegrationTest(ORTModelTestMixin): "squeezebert", "xlm", "xlm_roberta", + "rembert", ] FULL_GRID = {"model_arch": SUPPORTED_ARCHITECTURES} diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index ba8f6cc4abc..cccecd53817 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -135,6 +135,7 @@ "pix2struct": "fxmarty/pix2struct-tiny-random", "poolformer": "hf-internal-testing/tiny-random-PoolFormerModel", "qwen2": "fxmarty/tiny-dummy-qwen2", + "rembert": "hf-internal-testing/tiny-random-RemBertModel", "resnet": "hf-internal-testing/tiny-random-resnet", "roberta": "hf-internal-testing/tiny-random-RobertaModel", "roformer": "hf-internal-testing/tiny-random-RoFormerModel", From 3ba10576e755f8e0740251c891082ee96e722afa Mon Sep 17 00:00:00 2001 From: "Tang, Wenyi" Date: Mon, 2 Dec 2024 22:55:04 +0800 Subject: [PATCH 11/20] Fix `ModelPatcher` returns empty outputs (#2109) * fix bug `ModelPatcher` returns empty outputs When model's output is tuple or list, `filtered_outputs` doesn't get assigned and hence always a empty dict * typo --------- Co-authored-by: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> --- optimum/exporters/onnx/model_patcher.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/exporters/onnx/model_patcher.py b/optimum/exporters/onnx/model_patcher.py index fdfb0e280f5..2c0f9aeba67 100644 --- a/optimum/exporters/onnx/model_patcher.py +++ b/optimum/exporters/onnx/model_patcher.py @@ -168,7 +168,7 @@ def patched_forward(*args, **kwargs): filterd_outputs[name] = value elif isinstance(outputs, (list, tuple)): outputs_list = list(config.outputs.keys()) - dict(zip(outputs_list, outputs)) + filterd_outputs = dict(zip(outputs_list, outputs)) else: if len(config.outputs) > 1: num_outputs = len(config.outputs) From ff8c8fc95cb03b6ce72e0812bf0294bb2ae4463a Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:00:05 +0100 Subject: [PATCH 12/20] Fix workflow to mark issues as stale (#2110) * add permissions * update stale message --- .github/workflows/stale.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index a5e50a795b6..7b3eb5feb0c 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -6,9 +6,12 @@ on: jobs: stale: runs-on: ubuntu-latest + permissions: + issues: write steps: - - uses: actions/stale@v8 + - uses: actions/stale@v9 with: - stale-issue-message: 'This issue is stale because it has been open 30 days with no activity. Remove stale label or comment or this will be closed in 5 days.' + stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.' + exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization' days-before-stale: 30 days-before-close: 5 From 01110adf076c94e395d1472a760eafac2c0a73aa Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 17:11:16 +0100 Subject: [PATCH 13/20] Remove doc-build (#2111) --- .github/workflows/build_main_documentation.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/build_main_documentation.yml b/.github/workflows/build_main_documentation.yml index c922f5097da..d38274f320a 100644 --- a/.github/workflows/build_main_documentation.yml +++ b/.github/workflows/build_main_documentation.yml @@ -18,12 +18,6 @@ jobs: repository: 'huggingface/doc-builder' path: doc-builder - - uses: actions/checkout@v2 - with: - repository: 'huggingface/doc-build' - path: doc-build - token: ${{ secrets.HUGGINGFACE_PUSH }} - - uses: actions/checkout@v2 with: repository: 'huggingface/optimum' From 7f2605ea94071f5495eac110ba240e2651ea8053 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:19:57 +0100 Subject: [PATCH 14/20] Downgrade stale bot to v8 and fix permissions (#2112) --- .github/workflows/stale.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 7b3eb5feb0c..28cf3ad9dc2 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -3,13 +3,14 @@ on: schedule: - cron: '30 1 * * *' +permissions: + issues: write + jobs: stale: runs-on: ubuntu-latest - permissions: - issues: write steps: - - uses: actions/stale@v9 + - uses: actions/stale@v8 with: stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.' exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization' From d6de6762e0e4bf8136f0435211a0e777f5bf2f33 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Tue, 3 Dec 2024 19:20:09 +0100 Subject: [PATCH 15/20] Update documentation color from google tpu section (#2113) * Update documentation color from google tpu section * fix --- docs/source/index.mdx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/docs/source/index.mdx b/docs/source/index.mdx index 06133664ca8..1b54570ea80 100644 --- a/docs/source/index.mdx +++ b/docs/source/index.mdx @@ -43,7 +43,7 @@ The packages below enable you to get the best of the 🤗 Hugging Face ecosystem

Accelerate your training and inference workflows with AWS Trainium and AWS Inferentia

Google TPUs
+ >
Google TPUs

Accelerate your training and inference workflows with Google TPUs

-> [!TIP] -> Some packages provide hardware-agnostic features (e.g. INC interface in Optimum Intel). - - ## Open-source integrations 🤗 Optimum also supports a variety of open-source frameworks to make model optimization very easy. From 4a7cb298140ee9bed968d98a780a950d15bb2935 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Wed, 4 Dec 2024 17:04:37 +0100 Subject: [PATCH 16/20] Fix workflow to mark PRs as stale (#2116) --- .github/workflows/stale.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 28cf3ad9dc2..6dc3ff2bbd9 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -5,6 +5,7 @@ on: permissions: issues: write + pull-requests: write jobs: stale: @@ -13,6 +14,10 @@ jobs: - uses: actions/stale@v8 with: stale-issue-message: 'This issue has been marked as stale because it has been open for 30 days with no activity. This thread will be automatically closed in 5 days if no further activity occurs.' + stale-pr-message: 'This PR has been marked as stale because it has been open for 90 days with no activity. This thread will be automatically closed in 30 days if no further activity occurs.' exempt-issue-labels: 'bug,exporters,good first issue,onnx,onnxruntime,quantization' - days-before-stale: 30 - days-before-close: 5 + days-before-issue-stale: 30 + days-before-issue-close: 5 + days-before-pr-stale: 90 + days-before-pr-close: 30 + exempt-all-pr-assignees: true \ No newline at end of file From 12b3b35366bbc2282c45407eae642cdab4c1e894 Mon Sep 17 00:00:00 2001 From: Ella Charlaix <80481427+echarlaix@users.noreply.github.com> Date: Thu, 12 Dec 2024 13:35:56 +0100 Subject: [PATCH 17/20] Enable transformers v4.47 support (#2119) * enable latest transformers release * fix custom module test * adapt config push to hub tests --- setup.py | 8 +++---- tests/onnx/test_onnx_export_custom_module.py | 4 ++-- tests/test_configuration_utils.py | 24 +++++++------------- 3 files changed, 14 insertions(+), 22 deletions(-) diff --git a/setup.py b/setup.py index 6736085943a..28b6941ebe8 100644 --- a/setup.py +++ b/setup.py @@ -51,7 +51,7 @@ "datasets>=1.2.1", "evaluate", "protobuf>=3.20.1", - "transformers>=4.36,<4.47.0", + "transformers>=4.36,<4.48.0", ], "onnxruntime-gpu": [ "onnx", @@ -60,19 +60,19 @@ "evaluate", "protobuf>=3.20.1", "accelerate", # ORTTrainer requires it. - "transformers>=4.36,<4.47.0", + "transformers>=4.36,<4.48.0", ], "exporters": [ "onnx", "onnxruntime", "timm", - "transformers>=4.36,<4.47.0", + "transformers>=4.36,<4.48.0", ], "exporters-gpu": [ "onnx", "onnxruntime-gpu", "timm", - "transformers>=4.36,<4.47.0", + "transformers>=4.36,<4.48.0", ], "exporters-tf": [ "tensorflow>=2.4,<=2.12.1", diff --git a/tests/onnx/test_onnx_export_custom_module.py b/tests/onnx/test_onnx_export_custom_module.py index 4398c14f01d..9416093c841 100644 --- a/tests/onnx/test_onnx_export_custom_module.py +++ b/tests/onnx/test_onnx_export_custom_module.py @@ -22,7 +22,7 @@ if is_torch_available(): import torch - from transformers.models.deberta import modeling_deberta + from transformers.models.sew_d import modeling_sew_d from optimum.utils import check_if_torch_greater @@ -36,7 +36,7 @@ def test_training(self): """Tests export of StableDropout in training mode.""" devnull = open(os.devnull, "wb") # drop_prob must be > 0 for the test to be meaningful - sd = modeling_deberta.StableDropout(0.1) + sd = modeling_sew_d.StableDropout(0.1) # Avoid warnings in training mode do_constant_folding = False # Dropout is a no-op in inference mode diff --git a/tests/test_configuration_utils.py b/tests/test_configuration_utils.py index 4c721f089d7..d70b01fe7e1 100644 --- a/tests/test_configuration_utils.py +++ b/tests/test_configuration_utils.py @@ -12,13 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import os import tempfile import unittest from huggingface_hub import HfFolder, delete_repo from requests.exceptions import HTTPError -from transformers.testing_utils import TOKEN, USER, is_staging_test +from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test from optimum.configuration_utils import BaseConfig @@ -69,12 +68,11 @@ def tearDownClass(cls): def test_push_to_hub(self): config = FakeConfig(attribute=15) - with tempfile.TemporaryDirectory() as tmp_dir: - config.save_pretrained( - os.path.join(tmp_dir, "optimum-test-base-config"), push_to_hub=True, token=self._token - ) - new_config = FakeConfig.from_pretrained(f"{USER}/optimum-test-base-config") + with TemporaryHubRepo(token=self._token) as tmp_repo: + config.push_to_hub(tmp_repo.repo_id, token=self._token) + + new_config = FakeConfig.from_pretrained(tmp_repo.repo_id) for k, v in config.to_dict().items(): if k != "optimum_version" and k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) @@ -82,15 +80,9 @@ def test_push_to_hub(self): def test_push_to_hub_in_organization(self): config = FakeConfig(attribute=15) - with tempfile.TemporaryDirectory() as tmp_dir: - config.save_pretrained( - os.path.join(tmp_dir, "optimum-test-base-config-org"), - push_to_hub=True, - token=self._token, - organization="valid_org", - ) - - new_config = FakeConfig.from_pretrained("valid_org/optimum-test-base-config-org") + with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: + config.push_to_hub(tmp_repo.repo_id, token=self._token) + new_config = FakeConfig.from_pretrained(tmp_repo.repo_id) for k, v in config.to_dict().items(): if k != "optimum_version" and k != "transformers_version": self.assertEqual(v, getattr(new_config, k)) From 22d93e74ceffba796d8fb0dd47d99680be4b5608 Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Thu, 12 Dec 2024 15:17:18 +0200 Subject: [PATCH 18/20] Add ONNX export support for MGP-STR (#2099) * Enable mpg-str ONNX export * No longer needed * Improve model patcher * Formatting * `ruff` * Also support image-to-text task * Add unit tests * Add listed support for MGP-STR --- docs/source/exporters/onnx/overview.mdx | 1 + optimum/exporters/onnx/model_configs.py | 16 ++++++++++++ optimum/exporters/onnx/model_patcher.py | 26 ++++++++++++++++++++ optimum/exporters/tasks.py | 7 +++++- tests/exporters/exporters_utils.py | 2 ++ tests/onnxruntime/utils_onnxruntime_tests.py | 1 + 6 files changed, 52 insertions(+), 1 deletion(-) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 57005b85678..46ab3cb8a64 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -65,6 +65,7 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - Marian - MarkupLM - MBart +- MGP-STR - Mistral - MobileBert - MobileVit diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index b39d19ec782..85e235f9a96 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -82,6 +82,7 @@ from .model_patcher import ( CLIPModelPatcher, FalconModelPatcher, + MgpstrModelPatcher, MistralModelPatcher, MusicgenModelPatcher, SAMModelPatcher, @@ -933,6 +934,21 @@ def torch_to_onnx_input_map(self) -> Dict[str, str]: return {"x": "pixel_values"} +class MgpstrOnnxConfig(ViTOnnxConfig): + @property + def outputs(self) -> Dict[str, Dict[int, str]]: + return { + "char_logits": {0: "batch_size"}, + "bpe_logits": {0: "batch_size"}, + "wp_logits": {0: "batch_size"}, + } + + def patch_model_for_export( + self, model: Union["PreTrainedModel", "TFPreTrainedModel"], model_kwargs: Optional[Dict[str, Any]] = None + ) -> "ModelPatcher": + return MgpstrModelPatcher(self, model, model_kwargs=model_kwargs) + + class SentenceTransformersTransformerOnnxConfig(TextEncoderOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig DEFAULT_ONNX_OPSET = 14 # Some bottleneck transformers models require a specific ONNX opset to be successfully exported. We put a rather high opset here for the export to work for all architectures. diff --git a/optimum/exporters/onnx/model_patcher.py b/optimum/exporters/onnx/model_patcher.py index 2c0f9aeba67..083bc127999 100644 --- a/optimum/exporters/onnx/model_patcher.py +++ b/optimum/exporters/onnx/model_patcher.py @@ -509,6 +509,32 @@ def patched_forward(*args, **kwargs): self.patched_forward = patched_forward +class MgpstrModelPatcher(ModelPatcher): + def __init__( + self, + config: "OnnxConfig", + model: Union["PreTrainedModel", "TFPreTrainedModel"], + model_kwargs: Optional[Dict[str, Any]] = None, + ): + super().__init__(config, model, model_kwargs) + + @functools.wraps(self.orig_forward) + def patched_forward(*args, **kwargs): + signature = inspect.signature(self.orig_forward) + args, kwargs = override_arguments(args, kwargs, signature, model_kwargs=self.model_kwargs) + + # logits is a tuple, so we unpack it and return them as separate outputs + char_logits, bpe_logits, wp_logits = self.orig_forward(*args, **kwargs).logits + + return { + "char_logits": char_logits, + "bpe_logits": bpe_logits, + "wp_logits": wp_logits, + } + + self.patched_forward = patched_forward + + class SAMModelPatcher(ModelPatcher): def __init__( self, diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 0a3758e97cf..ba17730f9d9 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -211,7 +211,7 @@ class TasksManager: "image-classification": "AutoModelForImageClassification", "image-segmentation": ("AutoModelForImageSegmentation", "AutoModelForSemanticSegmentation"), "image-to-image": "AutoModelForImageToImage", - "image-to-text": "AutoModelForVision2Seq", + "image-to-text": ("AutoModelForVision2Seq", "AutoModel"), "mask-generation": "AutoModel", "masked-im": "AutoModelForMaskedImageModeling", "multiple-choice": "AutoModelForMultipleChoice", @@ -824,6 +824,11 @@ class TasksManager: "question-answering", onnx="MBartOnnxConfig", ), + "mgp-str": supported_tasks_mapping( + "feature-extraction", + "image-to-text", + onnx="MgpstrOnnxConfig", + ), "mistral": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 32156d9eebf..5f071e0f9eb 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -116,6 +116,7 @@ "marian": "sshleifer/tiny-marian-en-de", # hf-internal-testing ones are broken "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", "mbart": "hf-internal-testing/tiny-random-mbart", + "mgp-str": "hf-internal-testing/tiny-random-MgpstrForSceneTextRecognition", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", "mobilenet-v2": "hf-internal-testing/tiny-random-MobileNetV2Model", @@ -247,6 +248,7 @@ "marian": "Helsinki-NLP/opus-mt-en-de", "markuplm": "hf-internal-testing/tiny-random-MarkupLMModel", "mbart": "sshleifer/tiny-mbart", + "mgp-str": "alibaba-damo/mgp-str-base", "mobilebert": "google/mobilebert-uncased", # "mobilenet_v1": "google/mobilenet_v1_0.75_192", # "mobilenet_v2": "google/mobilenet_v2_0.35_96", diff --git a/tests/onnxruntime/utils_onnxruntime_tests.py b/tests/onnxruntime/utils_onnxruntime_tests.py index cccecd53817..c33c07fc7b1 100644 --- a/tests/onnxruntime/utils_onnxruntime_tests.py +++ b/tests/onnxruntime/utils_onnxruntime_tests.py @@ -118,6 +118,7 @@ "m2m_100": "hf-internal-testing/tiny-random-m2m_100", "marian": "echarlaix/tiny-random-marian", "mbart": "hf-internal-testing/tiny-random-mbart", + "mgp-str": "hf-internal-testing/tiny-random-MgpstrForSceneTextRecognition", "mistral": "echarlaix/tiny-random-mistral", "mobilebert": "hf-internal-testing/tiny-random-MobileBertModel", "mobilenet_v1": "google/mobilenet_v1_0.75_192", From 3f007661377956402439f2ea0567d28b930ca38c Mon Sep 17 00:00:00 2001 From: Joshua Lochner Date: Fri, 13 Dec 2024 17:58:40 +0200 Subject: [PATCH 19/20] Add ONNX export support for OLMo and OLMo2 (#2121) Add support for OLMo and OLMo2 Co-authored-by: Ella Charlaix --- docs/source/exporters/onnx/overview.mdx | 2 ++ optimum/exporters/onnx/model_configs.py | 9 +++++++++ optimum/exporters/tasks.py | 14 ++++++++++++++ tests/exporters/exporters_utils.py | 2 ++ 4 files changed, 27 insertions(+) diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx index 46ab3cb8a64..fbe7b42c44f 100644 --- a/docs/source/exporters/onnx/overview.mdx +++ b/docs/source/exporters/onnx/overview.mdx @@ -75,6 +75,8 @@ Supported architectures from [🤗 Transformers](https://huggingface.co/docs/tra - MT5 - Musicgen (text-conditional only) - Nystromformer +- OLMo +- OLMo2 - OWL-ViT - Pegasus - Perceiver diff --git a/optimum/exporters/onnx/model_configs.py b/optimum/exporters/onnx/model_configs.py index 85e235f9a96..1c838408807 100644 --- a/optimum/exporters/onnx/model_configs.py +++ b/optimum/exporters/onnx/model_configs.py @@ -325,6 +325,15 @@ class LlamaOnnxConfig(TextDecoderWithPositionIdsOnnxConfig): NORMALIZED_CONFIG_CLASS = NormalizedTextConfig +class OlmoOnnxConfig(LlamaOnnxConfig): + ATOL_FOR_VALIDATION = 1e-4 + MIN_TRANSFORMERS_VERSION = version.parse("4.40.0") + + +class Olmo2OnnxConfig(OlmoOnnxConfig): + MIN_TRANSFORMERS_VERSION = version.parse("4.47.0") + + class Qwen2OnnxConfig(LlamaOnnxConfig): MIN_TRANSFORMERS_VERSION = version.parse("4.37.0") diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index ba17730f9d9..32e90c7da19 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -954,6 +954,20 @@ class TasksManager: "text-generation-with-past", onnx="GraniteOnnxConfig", ), + "olmo": supported_tasks_mapping( + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + onnx="OlmoOnnxConfig", + ), + "olmo2": supported_tasks_mapping( + "feature-extraction", + "feature-extraction-with-past", + "text-generation", + "text-generation-with-past", + onnx="Olmo2OnnxConfig", + ), "pegasus": supported_tasks_mapping( "feature-extraction", "feature-extraction-with-past", diff --git a/tests/exporters/exporters_utils.py b/tests/exporters/exporters_utils.py index 5f071e0f9eb..e04a850bc8c 100644 --- a/tests/exporters/exporters_utils.py +++ b/tests/exporters/exporters_utils.py @@ -127,6 +127,8 @@ "mt5": "lewtun/tiny-random-mt5", "musicgen": "hf-internal-testing/tiny-random-MusicgenForConditionalGeneration", "nystromformer": "hf-internal-testing/tiny-random-NystromformerModel", + "olmo": "hf-internal-testing/tiny-random-OlmoForCausalLM", + "olmo2": "hf-internal-testing/tiny-random-Olmo2ForCausalLM", "opt": "hf-internal-testing/tiny-random-OPTModel", "owlv2": "hf-internal-testing/tiny-random-Owlv2Model", "owlvit": "hf-tiny-model-private/tiny-random-OwlViTModel", From 4daa40896f693649e21696c509cd98c7e0c40e3c Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Fri, 13 Dec 2024 17:03:26 +0100 Subject: [PATCH 20/20] Pass on `model_kwargs` when loading a sentence-transformers model before export (#2126) --- optimum/exporters/tasks.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/optimum/exporters/tasks.py b/optimum/exporters/tasks.py index 32e90c7da19..4db4130302d 100644 --- a/optimum/exporters/tasks.py +++ b/optimum/exporters/tasks.py @@ -2141,6 +2141,7 @@ def get_model_from_task( use_auth_token = model_kwargs.pop("use_auth_token", None) token = model_kwargs.pop("token", None) trust_remote_code = model_kwargs.pop("trust_remote_code", False) + model_kwargs["torch_dtype"] = torch_dtype if use_auth_token is not None: warnings.warn( @@ -2158,6 +2159,7 @@ def get_model_from_task( token=token, revision=revision, trust_remote_code=trust_remote_code, + model_kwargs=model_kwargs, ) else: try: