Merge branch 'main' into wangchang/normconfig

huggingface · Oct 20, 2023 · b83a770 · b83a770
2 parents 4855a00 + 15b8d1e
commit b83a770
Show file tree

Hide file tree

Showing 69 changed files with 4,908 additions and 3,797 deletions.
diff --git a/.github/workflows/test_onnxruntime_slow.yml b/.github/workflows/test_onnxruntime_slow.yml
@@ -0,0 +1,33 @@
+name: ONNX Runtime slow / Python - Test
+
+on:
+  workflow_dispatch:
+  schedule:
+    - cron: 0 7 * * * # every day at 7am
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
+  cancel-in-progress: true
+
+jobs:
+  build:
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.8, 3.9]
+        os: [ubuntu-20.04]
+
+    runs-on: ${{ matrix.os }}
+    steps:
+    - uses: actions/checkout@v2
+    - name: Setup Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies for export
+      run: |
+        pip install .[tests,onnxruntime]
+    - name: Test with unittest
+      working-directory: tests
+      run: |
+        RUN_SLOW=1 pytest onnxruntime -s -m "run_slow" --durations=0
diff --git a/docs/source/bettertransformer/overview.mdx b/docs/source/bettertransformer/overview.mdx
@@ -50,6 +50,7 @@ The list of supported model below:
 - [DeiT](https://arxiv.org/abs/2012.12877)
 - [Electra](https://arxiv.org/abs/2003.10555)
 - [Ernie](https://arxiv.org/abs/1904.09223)
+- [Falcon](https://arxiv.org/abs/2306.01116)
 - [FSMT](https://arxiv.org/abs/1907.06616)
 - [GPT2](https://d4mucfpksywv.cloudfront.net/better-language-models/language_models_are_unsupervised_multitask_learners.pdf)
 - [GPT-j](https://huggingface.co/EleutherAI/gpt-j-6B)
@@ -58,6 +59,7 @@ The list of supported model below:
 - [GPT BigCode](https://arxiv.org/abs/2301.03988) (SantaCoder, StarCoder)
 - [HuBERT](https://arxiv.org/pdf/2106.07447.pdf)
 - [LayoutLM](https://arxiv.org/abs/1912.13318)
+- [Llama & Llama2](https://arxiv.org/abs/2302.13971)
 - [MarkupLM](https://arxiv.org/abs/2110.08518)
 - [Marian](https://arxiv.org/abs/1804.00344)
 - [MBart](https://arxiv.org/abs/2001.08210)

diff --git a/docs/source/exporters/onnx/overview.mdx b/docs/source/exporters/onnx/overview.mdx
@@ -41,6 +41,7 @@ Supported architectures:
 - Donut-Swin
 - Electra
 - Encoder Decoder
+- Falcon
 - Flaubert
 - GPT-2
 - GPT-BigCode
@@ -60,6 +61,7 @@ Supported architectures:
 - M2-M100
 - Marian
 - MBart
+- Mistral
 - MobileBert
 - MobileVit
 - MobileNet v1
@@ -81,6 +83,7 @@ Supported architectures:
 - SEW
 - SEW-D
 - Speech2Text
+- SpeechT5
 - Splinter
 - SqueezeBert
 - Stable Diffusion

diff --git a/docs/source/index.mdx b/docs/source/index.mdx
@@ -23,27 +23,27 @@ As such, Optimum enables developers to efficiently use any of these platforms wi
   <div class="w-full flex flex-col space-y-4 md:space-y-0 md:grid md:grid-cols-3 md:gap-y-4 md:gap-x-5">
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./habana/index"
       ><div class="w-full text-center bg-gradient-to-br from-indigo-400 to-indigo-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Habana</div>
-      <p class="text-gray-700">Maximize training throughput and efficiency with <a href="https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html">Habana's Gaudi processor</a></p>
+      <p class="text-gray-700">Maximize training throughput and efficiency with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.habana.ai/en/latest/Gaudi_Overview/Gaudi_Architecture.html', '_blank');">Habana's Gaudi processor</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./intel/index"
       ><div class="w-full text-center bg-gradient-to-br from-blue-400 to-blue-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">Intel</div>
-      <p class="text-gray-700">Optimize your model to speedup inference with <a href="https://docs.openvino.ai/latest/index.html">OpenVINO</a> and <a href="https://www.intel.com/content/www/us/en/developer/tools/oneapi/neural-compressor.html">Neural Compressor</a></p>
+      <p class="text-gray-700">Optimize your model to speedup inference with <span class="underline" onclick="event.preventDefault(); window.open('https://docs.openvino.ai/latest/index.html', '_blank');">OpenVINO</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://www.intel.com/content/www/us/en/developer/tools/oneapi/neural-compressor.html', '_blank');">Neural Compressor</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="https://huggingface.co/docs/optimum-neuron/index"
       ><div class="w-full text-center bg-gradient-to-br from-orange-400 to-orange-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">AWS Trainium/Inferentia</div>
-      <p class="text-gray-700">Accelerate your training and inference workflows with <a href="https://aws.amazon.com/machine-learning/trainium/">AWS Trainium</a> and <a href="https://aws.amazon.com/machine-learning/inferentia/">AWS Inferentia</a></p>
+      <p class="text-gray-700">Accelerate your training and inference workflows with <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/trainium/', '_blank');">AWS Trainium</span> and <span class="underline" onclick="event.preventDefault(); window.open('https://aws.amazon.com/machine-learning/inferentia/', '_blank');">AWS Inferentia</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./furiosa/index"
       ><div class="w-full text-center bg-gradient-to-br from-green-400 to-green-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">FuriosaAI</div>
-      <p class="text-gray-700">Fast and efficient inference on <a href="https://www.furiosa.ai/">FuriosaAI WARBOY</a></p>
+      <p class="text-gray-700">Fast and efficient inference on <span class="underline" onclick="event.preventDefault(); window.open('https://www.furiosa.ai/', '_blank');">FuriosaAI WARBOY</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./onnxruntime/overview"
       ><div class="w-full text-center bg-gradient-to-br from-pink-400 to-pink-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">ONNX Runtime</div>
-      <p class="text-gray-700">Apply quantization and graph optimization to accelerate Transformers models training and inference with <a href="https://onnxruntime.ai/">ONNX Runtime</a></p>
+      <p class="text-gray-700">Apply quantization and graph optimization to accelerate Transformers models training and inference with <span class="underline" onclick="event.preventDefault(); window.open('https://onnxruntime.ai/', '_blank');">ONNX Runtime</span></p>
     </a>
     <a class="!no-underline border dark:border-gray-700 p-5 rounded-lg shadow hover:shadow-lg" href="./bettertransformer/overview"
       ><div class="w-full text-center bg-gradient-to-br from-yellow-400 to-yellow-500 rounded-lg py-1.5 font-semibold mb-5 text-white text-lg leading-relaxed">BetterTransformer</div>
-      <p class="text-gray-700">A one-liner integration to use <a href="https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/">PyTorch's BetterTransformer</a> with Transformers models</p>
+      <p class="text-gray-700">A one-liner integration to use <span class="underline" onclick="event.preventDefault(); window.open('https://pytorch.org/blog/a-better-transformer-for-fast-transformer-encoder-inference/', '_blank');">PyTorch's BetterTransformer</span> with Transformers models</p>
     </a>
   </div>
 </div>
diff --git a/docs/source/onnxruntime/usage_guides/pipelines.mdx b/docs/source/onnxruntime/usage_guides/pipelines.mdx
@@ -55,11 +55,9 @@ There are tags on the Model Hub that allow you to filter for a model you'd like
 
 <Tip>
 
-To be able to load the model with the ONNX Runtime backend, the export to ONNX needs
-to be supported for the considered architecture.
+To be able to load the model with the ONNX Runtime backend, the export to ONNX needs to be supported for the considered architecture.
 
-You can check the list of supported architectures
-[here](/exporters/onnx/package_reference/configuration#Supported-architectures).
+You can check the list of supported architectures [here](https://huggingface.co/docs/optimum/exporters/onnx/overview#overview).
 
 </Tip>
 

diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort-nightly-cu118
@@ -22,6 +22,7 @@ CMD nvidia-smi
 ENV DEBIAN_FRONTEND noninteractive
 
 # Versions
+# available options 3.8, 3.9, 3.10, 3.11
 ARG PYTHON_VERSION=3.9
 ARG TORCH_CUDA_VERSION=cu118
 ARG TORCH_VERSION=2.0.0
@@ -34,7 +35,7 @@ SHELL ["/bin/bash", "-c"]
 # Install and update tools to minimize security vulnerabilities
 RUN apt-get update
 RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
-    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
     apt-get clean
 RUN unattended-upgrade
 RUN apt-get autoremove -y
@@ -57,7 +58,7 @@ ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python
 # (Optional) Intall test dependencies
 RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
 RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
-RUN $PYTHON_EXE -m pip install fairscale deepspeed mpi4py
+RUN $PYTHON_EXE -m pip install deepspeed mpi4py
 # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb
 
 # PyTorch

diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.13.1-cu116 b/examples/onnxruntime/training/docker/Dockerfile-ort1.13.1-cu116
diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116 b/examples/onnxruntime/training/docker/Dockerfile-ort1.14.1-cu116
@@ -33,7 +33,7 @@ ARG TORCHVISION_VERSION=0.14.1
 # Install and update tools to minimize security vulnerabilities
 RUN apt-get update
 RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
-    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
     apt-get clean
 RUN unattended-upgrade
 RUN apt-get autoremove -y
@@ -48,7 +48,7 @@ RUN pip install pygit2 pgzip
 # (Optional) Intall test dependencies
 RUN pip install git+https://github.com/huggingface/transformers
 RUN pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
-RUN pip install fairscale deepspeed mpi4py
+RUN pip install deepspeed mpi4py
 # RUN pip install optuna ray sigopt wandb
 
 # Install onnxruntime-training dependencies

diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort1.15.1-cu118
@@ -34,7 +34,7 @@ ARG TORCHVISION_VERSION=0.15.1
 # Install and update tools to minimize security vulnerabilities
 RUN apt-get update
 RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
-    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev && \
+    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
     apt-get clean
 RUN unattended-upgrade
 RUN apt-get autoremove -y
@@ -57,7 +57,7 @@ ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python
 # (Optional) Intall test dependencies
 RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
 RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
-RUN $PYTHON_EXE -m pip install fairscale deepspeed mpi4py
+RUN $PYTHON_EXE -m pip install deepspeed mpi4py
 # RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb
 
 # PyTorch

diff --git a/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118 b/examples/onnxruntime/training/docker/Dockerfile-ort1.16.1-cu118
@@ -0,0 +1,76 @@
+#!/usr/bin/env python
+# coding=utf-8
+# Copyright 2023 The HuggingFace Team All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Use nvidia/cuda image
+FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04
+CMD nvidia-smi
+
+# Ignore interactive questions during `docker build`
+ENV DEBIAN_FRONTEND noninteractive
+
+# Versions
+ARG PYTHON_VERSION=3.10
+ARG TORCH_CUDA_VERSION=cu118
+ARG TORCH_VERSION=2.0.0
+ARG TORCHVISION_VERSION=0.15.1
+
+# Bash shell
+RUN chsh -s /bin/bash
+SHELL ["/bin/bash", "-c"]
+
+# Install and update tools to minimize security vulnerabilities
+RUN apt-get update
+RUN apt-get install -y software-properties-common wget apt-utils patchelf git libprotobuf-dev protobuf-compiler cmake \
+    bzip2 ca-certificates libglib2.0-0 libxext6 libsm6 libxrender1 mercurial subversion libopenmpi-dev ffmpeg && \
+    apt-get clean
+RUN unattended-upgrade
+RUN apt-get autoremove -y
+
+# Install miniconda (comes with python 3.9 default)
+ARG BUILD_USER=onnxruntimedev
+ARG MINICONDA_PREFIX=/home/$BUILD_USER/miniconda3
+RUN apt-get install curl
+
+ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-py37_4.9.2-Linux-x86_64.sh
+RUN curl -fSsL --insecure ${CONDA_URL} -o install-conda.sh && \
+    /bin/bash ./install-conda.sh -b -p $MINICONDA_PREFIX && \
+    $MINICONDA_PREFIX/bin/conda clean -ya && \
+    $MINICONDA_PREFIX/bin/conda install -y python=${PYTHON_VERSION}
+
+ENV PATH=$MINICONDA_PREFIX/bin:${PATH}
+
+ARG PYTHON_EXE=$MINICONDA_PREFIX/bin/python
+
+# (Optional) Intall test dependencies
+RUN $PYTHON_EXE -m pip install git+https://github.com/huggingface/transformers
+RUN $PYTHON_EXE -m pip install datasets accelerate evaluate coloredlogs absl-py rouge_score seqeval scipy sacrebleu nltk scikit-learn parameterized sentencepiece
+RUN $PYTHON_EXE -m pip install deepspeed mpi4py
+# RUN $PYTHON_EXE -m pip install optuna ray sigopt wandb
+
+# PyTorch
+RUN $PYTHON_EXE -m pip install onnx ninja
+RUN $PYTHON_EXE -m pip install torch==${TORCH_VERSION} torchvision==${TORCHVISION_VERSION} -f https://download.pytorch.org/whl/${TORCH_CUDA_VERSION}
+
+# ORT Module
+RUN $PYTHON_EXE -m pip install onnxruntime-training==1.16.1 -f https://download.onnxruntime.ai/onnxruntime_stable_cu118.html
+RUN $PYTHON_EXE -m pip install torch-ort
+ENV TORCH_CUDA_ARCH_LIST="5.2 6.0 6.1 7.0 7.5 8.0 8.6+PTX"
+RUN $PYTHON_EXE -m pip install --upgrade protobuf==3.20.2
+RUN $PYTHON_EXE -m torch_ort.configure
+
+WORKDIR .
+
+CMD ["/bin/bash"]