From 2fa9dcd8a37844e6b78cd61d4ad042cad607798b Mon Sep 17 00:00:00 2001 From: IlyasMoutawwakil Date: Mon, 16 Oct 2023 16:24:39 +0200 Subject: [PATCH] use optimum/gpt2 --- docs/source/onnxruntime/usage_guides/gpu.mdx | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/docs/source/onnxruntime/usage_guides/gpu.mdx b/docs/source/onnxruntime/usage_guides/gpu.mdx index d38f07250e0..aad8ec0d890 100644 --- a/docs/source/onnxruntime/usage_guides/gpu.mdx +++ b/docs/source/onnxruntime/usage_guides/gpu.mdx @@ -309,24 +309,20 @@ For example, for text generation, the engine can be built with: ```python >>> import os ->>> from transformers import AutoTokenizer >>> from optimum.onnxruntime import ORTModelForCausalLM >>> os.makedirs("tmp/trt_cache_gpt2_example", exist_ok=True) >>> provider_options = { ... "trt_engine_cache_enable": True, -... "trt_engine_cache_path": "tmp/trt_cache_gpt2_example" +... "trt_engine_cache_path": "tmp/trt_cache_gpt2_example", ... "trt_profile_min_shapes": "input_ids:1x1,attention_mask:1x1,position_ids:1x1", ... "trt_profile_opt_shapes": "input_ids:1x1,attention_mask:1x1,position_ids:1x1", ... "trt_profile_max_shapes": "input_ids:1x64,attention_mask:1x64,position_ids:1x64", ... } >>> ort_model = ORTModelForCausalLM.from_pretrained( -... "gpt2", -... export=True, +... "optimum/gpt2", ... use_cache=False, -... use_merged=False, -... use_io_binding=False, ... provider="TensorrtExecutionProvider", ... provider_options=provider_options, ... )