-
Notifications
You must be signed in to change notification settings - Fork 128
environments foundation model inference
github-actions[bot] edited this page Aug 14, 2024
·
77 revisions
Environment used for deploying model to use DS-MII or vLLM for inference
Version: 49
Preview
DS-MII
VLLM
View in Studio: https://ml.azure.com/registries/azureml/environments/foundation-model-inference/version/49
Docker image: mcr.microsoft.com/azureml/curated/foundation-model-inference:49
FROM nvidia/cuda:12.1.1-devel-ubuntu20.04
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
TZ=Etc/UTC \
DEBIAN_FRONTEND=noninteractive
RUN apt update && apt upgrade -y && apt install software-properties-common -y && add-apt-repository ppa:deadsnakes/ppa -y
RUN apt install git -y
ENV MINICONDA_VERSION py310_23.10.0-1
ENV PATH /opt/miniconda/bin:$PATH
RUN apt-get update && \
apt-get install -y --no-install-recommends wget runit
RUN wget -qO /tmp/miniconda.sh https://repo.anaconda.com/miniconda/Miniconda3-${MINICONDA_VERSION}-Linux-x86_64.sh && \
bash /tmp/miniconda.sh -bf -p /opt/miniconda && \
conda update --all -c conda-forge -y && \
conda clean -ay && \
rm -rf /opt/miniconda/pkgs && \
rm /tmp/miniconda.sh && \
find / -type d -name __pycache__ | xargs rm -rf
ENV AZUREML_CONDA_ENVIRONMENT_PATH /azureml-envs/default
# Create conda environment with py310
RUN conda create -p $AZUREML_CONDA_ENVIRONMENT_PATH \
python=3.10 \
-c conda-forge
ENV PATH $AZUREML_CONDA_ENVIRONMENT_PATH/bin:$PATH
ENV CONDA_DEFAULT_ENV=$AZUREML_CONDA_ENVIRONMENT_PATH
ENV CONDA_PREFIX=$AZUREML_CONDA_ENVIRONMENT_PATH
WORKDIR /
# Needed for megablocks
RUN pip install torch~=2.3.0 --index-url https://download.pytorch.org/whl/cu121
# mixtral specific
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.6 8.9 9.0+PTX'
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
RUN pip install git+https://github.com/stanford-futuredata/megablocks.git@5897cd6
# For local testing
# Need to copy src code and install in editable mode
# COPY . .
# RUN pip install -e ./ --no-cache-dir
# When copied to assets repo, change to install from public pypi
RUN pip install llm-optimized-inference==0.2.7 --no-cache-dir
RUN pip uninstall -y vllm
# Install patched vllm wheel
RUN pip install https://automlsamplenotebookdata.blob.core.windows.net/vllm/vllm-0.5.3.post1-cp310-cp310-linux_x86_64.whl
# clean conda and pip caches
RUN rm -rf ~/.cache/pip
ADD runit_folder/api_server /var/runit/api_server
RUN sed -i 's/\r$//g' /var/runit/api_server/run
RUN chmod +x /var/runit/api_server/run
ENV SVDIR=/var/runit
ENV WORKER_TIMEOUT=3600
ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
EXPOSE 5001
CMD [ "runsvdir", "/var/runit" ]