From 04c72cb3b063b88a6d99707f7a7312776ad7a3da Mon Sep 17 00:00:00 2001 From: PeterYang12 Date: Sun, 11 Aug 2024 22:47:04 -0700 Subject: [PATCH 1/2] GMC: Add GPU support for GMC. Enable NVIDIA GPU support for GMC, including sequence and switch mode. Note that switch mode may fail due to NO enough GPU memory. Signed-off-by: PeterYang12 --- .../config/samples/chatQnA_nv.yaml | 68 ++++++++++ .../config/samples/chatQnA_switch_nv.yaml | 124 ++++++++++++++++++ .../controller/gmconnector_controller.go | 3 + microservices-connector/usage_guide.md | 6 +- 4 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 microservices-connector/config/samples/chatQnA_nv.yaml create mode 100644 microservices-connector/config/samples/chatQnA_switch_nv.yaml diff --git a/microservices-connector/config/samples/chatQnA_nv.yaml b/microservices-connector/config/samples/chatQnA_nv.yaml new file mode 100644 index 000000000..ae8c0362c --- /dev/null +++ b/microservices-connector/config/samples/chatQnA_nv.yaml @@ -0,0 +1,68 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: nvidia + name: chatqa + namespace: chatqa +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + internalService: + serviceName: embedding-svc + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc + isDownstreamService: true + - name: Retriever + data: $response + internalService: + serviceName: retriever-svc + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc + - name: VectorDB + internalService: + serviceName: redis-vector-db + isDownstreamService: true + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + internalService: + serviceName: llm-svc + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-m + - name: TgiNvidia + internalService: + serviceName: tgi-service-m + config: + endpoint: /generate + isDownstreamService: true diff --git a/microservices-connector/config/samples/chatQnA_switch_nv.yaml b/microservices-connector/config/samples/chatQnA_switch_nv.yaml new file mode 100644 index 000000000..dc2021b16 --- /dev/null +++ b/microservices-connector/config/samples/chatQnA_switch_nv.yaml @@ -0,0 +1,124 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: gmc.opea.io/v1alpha3 +kind: GMConnector +metadata: + labels: + app.kubernetes.io/name: gmconnector + app.kubernetes.io/managed-by: kustomize + gmc/platform: nvidia + name: switch + namespace: switch +spec: + routerConfig: + name: router + serviceName: router-service + nodes: + root: + routerType: Sequence + steps: + - name: Embedding + nodeName: node1 + - name: Reranking + data: $response + internalService: + serviceName: reranking-svc + config: + endpoint: /v1/reranking + TEI_RERANKING_ENDPOINT: tei-reranking-svc + - name: TeiReranking + internalService: + serviceName: tei-reranking-svc + config: + endpoint: /rerank + isDownstreamService: true + - name: Llm + data: $response + nodeName: node2 + node1: + routerType: Switch + steps: + - name: Embedding + condition: embedding-model-id==large + internalService: + serviceName: embedding-svc-large + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Embedding + condition: embedding-model-id==small + internalService: + serviceName: embedding-svc-small + config: + endpoint: /v1/embeddings + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge15 + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: TeiEmbedding + internalService: + serviceName: tei-embedding-svc-bge-small + config: + MODEL_ID: BAAI/bge-base-en-v1.5 + isDownstreamService: true + - name: Retriever + condition: embedding-model-id==large + data: $response + internalService: + serviceName: retriever-svc-large + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-large + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge15 + - name: Retriever + condition: embedding-model-id==small + data: $response + internalService: + serviceName: retriever-svc-small + config: + endpoint: /v1/retrieval + REDIS_URL: redis-vector-db-small + TEI_EMBEDDING_ENDPOINT: tei-embedding-svc-bge-small + - name: VectorDB + internalService: + serviceName: redis-vector-db-large + isDownstreamService: true + - name: VectorDB + internalService: + serviceName: redis-vector-db-small + isDownstreamService: true + node2: + routerType: Switch + steps: + - name: Llm + condition: model-id==intel + internalService: + serviceName: llm-svc-intel + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-intel + - name: Llm + condition: model-id==llama + internalService: + serviceName: llm-svc-llama + config: + endpoint: /v1/chat/completions + TGI_LLM_ENDPOINT: tgi-service-llama + - name: TgiNvidia + internalService: + serviceName: tgi-service-intel + config: + endpoint: /generate + MODEL_ID: Intel/neural-chat-7b-v3-3 + isDownstreamService: true + - name: TgiNvidia + internalService: + serviceName: tgi-service-llama + config: + endpoint: /generate + MODEL_ID: bigscience/bloom-560m + isDownstreamService: true diff --git a/microservices-connector/internal/controller/gmconnector_controller.go b/microservices-connector/internal/controller/gmconnector_controller.go index b74839e62..4537baf0c 100644 --- a/microservices-connector/internal/controller/gmconnector_controller.go +++ b/microservices-connector/internal/controller/gmconnector_controller.go @@ -45,12 +45,14 @@ const ( TeiReranking = "TeiReranking" Tgi = "Tgi" TgiGaudi = "TgiGaudi" + TgiNvidia = "TgiNvidia" Llm = "Llm" DocSum = "DocSum" Router = "router" DataPrep = "DataPrep" xeon = "xeon" gaudi = "gaudi" + nvidia = "nvidia" WebRetriever = "WebRetriever" yaml_dir = "/tmp/microservices/yamls/" Service = "Service" @@ -76,6 +78,7 @@ var yamlDict = map[string]string{ TeiReranking: yaml_dir + "teirerank.yaml", Tgi: yaml_dir + "tgi.yaml", TgiGaudi: yaml_dir + "tgi_gaudi.yaml", + TgiNvidia: yaml_dir + "tgi_nv.yaml", Llm: yaml_dir + "llm-uservice.yaml", DocSum: yaml_dir + "docsum-llm-uservice.yaml", Router: yaml_dir + "gmc-router.yaml", diff --git a/microservices-connector/usage_guide.md b/microservices-connector/usage_guide.md index 0b57cbf10..452b95682 100644 --- a/microservices-connector/usage_guide.md +++ b/microservices-connector/usage_guide.md @@ -14,6 +14,10 @@ A sample for chatQnA can be found at config/samples/chatQnA_xeon.yaml ```sh kubectl create ns chatqa kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml +# To use Gaudi devive +#kubectl apply -f $(pwd)/config/samples/chatQnA_gaudi.yaml +# To use Nvidia GPU +#kubectl apply -f $(pwd)/config/samples/chatQnA_nv.yaml ``` **GMC will reconcile chatQnA custom resource and get all related components/services ready** @@ -39,7 +43,7 @@ kubectl create deployment client-test -n chatqa --image=python:3.8.13 -- sleep i **Access the pipeline using the above URL from the client pod** ```bash -export CLIENT_POD=$(kubectl get pod -l app=client-test -o jsonpath={.items..metadata.name}) +export CLIENT_POD=$(kubectl get pod -n chatqa -l app=client-test -o jsonpath={.items..metadata.name}) export accessUrl=$(kubectl get gmc -n chatqa -o jsonpath="{.items[?(@.metadata.name=='chatqa')].status.accessUrl}") kubectl exec "$CLIENT_POD" -n chatqa -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?","parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' ``` From 7e0476a283b5b3285ed00342a31b8dc75a7b7e66 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 13 Aug 2024 01:12:29 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- microservices-connector/usage_guide.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/microservices-connector/usage_guide.md b/microservices-connector/usage_guide.md index 452b95682..710faf2cd 100644 --- a/microservices-connector/usage_guide.md +++ b/microservices-connector/usage_guide.md @@ -14,7 +14,7 @@ A sample for chatQnA can be found at config/samples/chatQnA_xeon.yaml ```sh kubectl create ns chatqa kubectl apply -f $(pwd)/config/samples/chatQnA_xeon.yaml -# To use Gaudi devive +# To use Gaudi device #kubectl apply -f $(pwd)/config/samples/chatQnA_gaudi.yaml # To use Nvidia GPU #kubectl apply -f $(pwd)/config/samples/chatQnA_nv.yaml