Phi 3.5 Moe FMI fix (#3703)

* Phi 3.5 Moe instruct - Upgrade FMI base image to 62 with custom vllm package fix - include support for A100 SKUs * Phi 3.5 Moe instruct - Upgrade FMI base image to 62 with custom vllm package fix - include support for A100 SKUs --------- Co-authored-by: Sarthak Singhal <[email protected]>
Azure · Dec 24, 2024 · 008bbf8 · 008bbf8
1 parent ed4e91e
commit 008bbf8
Show file tree

Hide file tree

Showing 3 changed files with 6 additions and 6 deletions.
diff --git a/assets/models/system/phi-3.5-moe-128k-instruct/MLmodel b/assets/models/system/phi-3.5-moe-128k-instruct/MLmodel
@@ -20,7 +20,7 @@ flavors:
     loader_module: azureml.evaluate.mlflow.hftransformers
     python_version: 3.9.19
 metadata:
-  azureml.base_image: mcr.microsoft.com/azureml/curated/foundation-model-inference:49
+  azureml.base_image: mcr.microsoft.com/azureml/curated/foundation-model-inference:62
   base_model_name: Phi-3.5-MoE-128k-Instruct
   base_model_task: chat-completion
   model_provider_name: microsoft

diff --git a/assets/models/system/phi-3.5-moe-128k-instruct/spec.yaml b/assets/models/system/phi-3.5-moe-128k-instruct/spec.yaml
@@ -68,4 +68,4 @@ tags:
     logging_steps: 10
     save_total_limit: 1
   benchmark: "quality"
-version: 4
+version: 5
diff --git a/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile b/assets/training/model_management/environments/foundation-model-inference/context/Dockerfile
@@ -48,12 +48,12 @@ RUN pip install git+https://github.com/stanford-futuredata/megablocks.git@5897cd
 # RUN pip install -e ./ --no-cache-dir
 
 # When copied to assets repo, change to install from public pypi
-RUN pip install llm-optimized-inference==0.2.16 --no-cache-dir
+RUN pip install llm-optimized-inference==0.2.7 --no-cache-dir
 
-RUN pip uninstall transformers -y
 RUN pip uninstall -y vllm
-RUN pip install vllm==0.6.1.post2
-RUN pip install transformers==4.46.2
+
+# Install patched vllm wheel
+RUN pip install https://automlsamplenotebookdata.blob.core.windows.net/vllm/vllm-0.5.3.post1-cp310-cp310-linux_x86_64.whl
 
 # clean conda and pip caches
 RUN rm -rf ~/.cache/pip