Merge branch 'master' into ray-doc-1

Signed-off-by: Yiqing Wang <[email protected]>
ray-project · Oct 4, 2023 · 1465170 · 1465170
2 parents 139a900 + fa6f017
commit 1465170
Show file tree

Hide file tree

Showing 8 changed files with 16 additions and 14 deletions.
diff --git a/README.md b/README.md
@@ -76,7 +76,7 @@ ray up deploy/ray/aviary-cluster.yaml
 ray attach deploy/ray/aviary-cluster.yaml
 
 # Deploy the LightGPT model. 
-serve run serve/amazon--LightGPT.yaml
+serve run serve_configs/amazon--LightGPT.yaml
 ```
 
 You can deploy any model in the `models` directory of this repo,
@@ -274,7 +274,7 @@ RayLLM uses the Ray Serve CLI that allows you to interact with deployed models.
 
 ```shell
 # Start a new model in Ray Serve from provided configuration
-serve run serve/<model_config_path>
+serve run serve_configs/<model_config_path>
 
 # Get the status of the running deployments
 serve status
@@ -303,7 +303,8 @@ The easiest way is to copy the configuration of the existing model's YAML file a
 Run multiple models at once by aggregating the Serve configs for different models into a single, unified config. For example, use this config to run the `LightGPT` and `Llama-2-7b-chat` model in a single Serve application:
 
 ```yaml
-# File name: serve/config.yaml
+# File name: serve_configs/config.yaml
+
 applications:
 - name: router
   import_path: aviary.backend:router_application
@@ -314,12 +315,12 @@ applications:
       - ./models/continuous_batching/meta-llama--Llama-2-7b-chat-hf.yaml
 ```
 
-The config includes both models in the `model` argument for the `router`. Additionally, the Serve configs for both model applications are included. Save this unified config file to the `serve/` folder.
+The config includes both models in the `model` argument for the `router`. Additionally, the Serve configs for both model applications are included. Save this unified config file to the `serve_configs/` folder.
 
 Run the config to deploy the models:
 
 ```shell
-serve run serve/<config.yaml>
+serve run serve_configs/<config.yaml>
 ```
 
 ## How do I deploy a model to multiple nodes?

diff --git a/aviary/backend/server/models.py b/aviary/backend/server/models.py
@@ -508,7 +508,7 @@ class AppArgs(BaseModel):
 
 
 class RouterArgs(BaseModel):
-    models: Dict[str, Union[str, LLMApp]]
+    models: Union[str, LLMApp, List[Union[LLMApp, str]]]
 
 
 class PlacementConfig(BaseModel):

diff --git a/aviary/backend/server/run.py b/aviary/backend/server/run.py
@@ -7,7 +7,7 @@
 from aviary.backend.llm.vllm.vllm_engine import VLLMEngine
 from aviary.backend.llm.vllm.vllm_models import VLLMApp
 from aviary.backend.server.app import RouterDeployment
-from aviary.backend.server.models import LLMApp, ScalingConfig
+from aviary.backend.server.models import LLMApp, RouterArgs, ScalingConfig
 from aviary.backend.server.plugins.deployment_base_client import DeploymentBaseClient
 from aviary.backend.server.plugins.execution_hooks import (
     ExecutionHooks,
@@ -111,7 +111,8 @@ def router_deployment(
 
 
 def router_application(args):
-    llm_apps = parse_args(args, llm_app_cls=VLLMApp)
+    router_args = RouterArgs.parse_obj(args)
+    llm_apps = parse_args(router_args.models, llm_app_cls=VLLMApp)
     return router_deployment(llm_apps, enable_duplicate_models=False)
 
 
@@ -125,7 +126,7 @@ def run(
 
     """
     ray._private.usage.usage_lib.record_library_usage("aviary")
-    router_app = router_application(vllm_base_args)
+    router_app = router_application({"models": vllm_base_args})
 
     host = "0.0.0.0"
 

diff --git a/serve/amazon--LightGPT.yaml → serve_configs/amazon--LightGPT.yaml b/serve/amazon--LightGPT.yaml → serve_configs/amazon--LightGPT.yaml
@@ -3,5 +3,5 @@ applications:
   route_prefix: /amazon--LightGPT
   import_path: aviary.backend:router_application
   args:
-    args:
+    models:
       - "./models/continuous_batching/amazon--LightGPT.yaml"
diff --git a/...codellama--CodeLlama-34b-Instruct-hf.yaml → ...codellama--CodeLlama-34b-Instruct-hf.yaml b/...codellama--CodeLlama-34b-Instruct-hf.yaml → ...codellama--CodeLlama-34b-Instruct-hf.yaml
@@ -3,5 +3,5 @@ applications:
   route_prefix: /codellama--CodeLlama-34b-Instruct-hf
   import_path: aviary.backend:router_application
   args:
-    args:
+    models:
       - "./models/continuous_batching/codellama--CodeLlama-34b-Instruct-hf.yaml"
diff --git a/serve/meta-llama--Llama-2-13b-chat-hf.yaml → ...figs/meta-llama--Llama-2-13b-chat-hf.yaml b/serve/meta-llama--Llama-2-13b-chat-hf.yaml → ...figs/meta-llama--Llama-2-13b-chat-hf.yaml
@@ -3,5 +3,5 @@ applications:
   route_prefix: /meta-llama--Llama-2-13b-chat-hf
   import_path: aviary.backend:router_application
   args:
-    args:
+    models:
       - "./models/continuous_batching/meta-llama--Llama-2-13b-chat-hf.yaml"
diff --git a/serve/meta-llama--Llama-2-70b-chat-hf.yaml → ...figs/meta-llama--Llama-2-70b-chat-hf.yaml b/serve/meta-llama--Llama-2-70b-chat-hf.yaml → ...figs/meta-llama--Llama-2-70b-chat-hf.yaml
@@ -3,5 +3,5 @@ applications:
   route_prefix: /meta-llama--Llama-2-70b-chat-hf
   import_path: aviary.backend:router_application
   args:
-    args:
+    models:
       - "./models/continuous_batching/meta-llama--Llama-2-70b-chat-hf.yaml"
diff --git a/serve/meta-llama--Llama-2-7b-chat-hf.yaml → ...nfigs/meta-llama--Llama-2-7b-chat-hf.yaml b/serve/meta-llama--Llama-2-7b-chat-hf.yaml → ...nfigs/meta-llama--Llama-2-7b-chat-hf.yaml
@@ -3,5 +3,5 @@ applications:
   route_prefix: /meta-llama--Llama-2-7b-chat-hf
   import_path: aviary.backend:router_application
   args:
-    args:
+    models:
       - "./models/continuous_batching/meta-llama--Llama-2-7b-chat-hf.yaml"