diff --git a/.github/workflows/image-build-on-manual.yaml b/.github/workflows/image-build-on-manual.yaml index d495e3371..7823c5782 100644 --- a/.github/workflows/image-build-on-manual.yaml +++ b/.github/workflows/image-build-on-manual.yaml @@ -41,6 +41,7 @@ jobs: echo "nodes=$nodes" >> $GITHUB_OUTPUT image-build: + needs: get-build-matrix strategy: matrix: node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }} diff --git a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh index 7cf9352b0..454c924cb 100755 --- a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh +++ b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh @@ -11,6 +11,7 @@ USER_ID=$(whoami) LOG_PATH=/home/$(whoami)/logs CHATQNA_NAMESPACE="${APP_NAMESPACE}-chatqna" CHATQNA_DATAPREP_NAMESPACE="${APP_NAMESPACE}-chatqna-dataprep" +CHATQNA_SWITCH_NAMESPACE="${APP_NAMESPACE}-chatqna-switch" CODEGEN_NAMESPACE="${APP_NAMESPACE}-codegen" CODETRANS_NAMESPACE="${APP_NAMESPACE}-codetrans" DOCSUM_NAMESPACE="${APP_NAMESPACE}-docsum" @@ -22,21 +23,25 @@ function validate_gmc() { echo "validate chat-qna with dataprep" validate_chatqna_with_dataprep - echo "validate codegen" - validate_codegen + echo "validate chat-qna in switch mode" + validate_chatqna_in_switch - echo "validate codetrans" - validate_codetrans + # echo "validate codegen" + # validate_codegen - echo "validate docsum" - validate_docsum + # echo "validate codetrans" + # validate_codetrans + + # echo "validate docsum" + # validate_docsum get_gmc_controller_logs } function cleanup_apps() { echo "clean up microservice-connector" - namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE") + # namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE") + namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE") for ns in "${namespaces[@]}"; do if kubectl get namespace $ns > /dev/null 2>&1; then echo "Deleting namespace: $ns" @@ -101,7 +106,7 @@ function validate_chatqna() { } function validate_chatqna_with_dataprep() { - kubectl create ns $CHATQNA_DATAPREP_NAMESPACE + kubectl create ns $CHATQNA_DATAPREP_NAMESPACE sed -i "s|namespace: chatqa|namespace: $CHATQNA_DATAPREP_NAMESPACE|g" $(pwd)/config/samples/chatQnA_dataprep_gaudi.yaml # workaround for issue #268 yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_dataprep_gaudi.yaml @@ -178,6 +183,87 @@ function validate_chatqna_with_dataprep() { fi } +function validate_chatqna_in_switch() { + kubectl create ns $CHATQNA_SWITCH_NAMESPACE + sed -i "s|namespace: switch|namespace: $CHATQNA_SWITCH_NAMESPACE|g" $(pwd)/config/samples/chatQnA_switch_gaudi.yaml + # workaround for issue #268 + yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_switch_gaudi.yaml + kubectl apply -f $(pwd)/config/samples/chatQnA_switch_gaudi.yaml + + # Wait until the router service is ready + echo "Waiting for the chatqa router service to be ready..." + wait_until_pod_ready "chatqna router" $CHATQNA_SWITCH_NAMESPACE "router-service" + output=$(kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE) + echo $output + + # deploy client pod for testing + kubectl create deployment client-test -n $CHATQNA_SWITCH_NAMESPACE --image=python:3.8.13 -- sleep infinity + + # Wait until all pods are ready + wait_until_all_pod_ready $CHATQNA_SWITCH_NAMESPACE 300s + if [ $? -ne 0 ]; then + echo "Error Some pods are not ready!" + exit 1 + fi + + # giving time to populating data + sleep 90 + + kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE + # send request to chatqnA + export CLIENT_POD=$(kubectl get pod -n $CHATQNA_SWITCH_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) + echo "$CLIENT_POD" + accessUrl=$(kubectl get gmc -n $CHATQNA_SWITCH_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}") + + # test the chatqna with model condition: "model-id":"intel" and "embedding-model-id":"small" + kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"intel", "embedding-model-id":"small", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_intel.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_intel.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]]; then + cat $LOG_PATH/curl_chatqna_switch_intel.log + fi + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeed!" + fi + + # test the chatqna with model condition: "model-id":"llama" and "embedding-model-id":"large" + kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"llama", "embedding-model-id":"large", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_llama.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_llama.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]]; then + cat $LOG_PATH/curl_chatqna_switch_llama.log + fi + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeed!" + fi +} + function validate_codegen() { kubectl create ns $CODEGEN_NAMESPACE sed -i "s|namespace: codegen|namespace: $CODEGEN_NAMESPACE|g" $(pwd)/config/samples/codegen_gaudi.yaml diff --git a/.github/workflows/scripts/e2e/gmc_install.sh b/.github/workflows/scripts/e2e/gmc_install.sh index b94ce39c1..f07004063 100755 --- a/.github/workflows/scripts/e2e/gmc_install.sh +++ b/.github/workflows/scripts/e2e/gmc_install.sh @@ -64,7 +64,7 @@ function cleanup_gmc() { if kubectl get namespace $SYSTEM_NAMESPACE > /dev/null 2>&1; then echo "Deleting namespace: $SYSTEM_NAMESPACE" kubectl delete namespace "$SYSTEM_NAMESPACE" - kubectl delete crd gmconnectors.gmc.opea.io + kubectl delete crd gmconnectors.gmc.opea.io || true else echo "Namespace $SYSTEM_NAMESPACE does not exist" fi diff --git a/.github/workflows/scripts/e2e/gmc_xeon_test.sh b/.github/workflows/scripts/e2e/gmc_xeon_test.sh index 8ee1867b9..16a5fff39 100755 --- a/.github/workflows/scripts/e2e/gmc_xeon_test.sh +++ b/.github/workflows/scripts/e2e/gmc_xeon_test.sh @@ -11,6 +11,7 @@ USER_ID=$(whoami) LOG_PATH=/home/$(whoami)/logs CHATQNA_NAMESPACE="${APP_NAMESPACE}-chatqna" CHATQNA_DATAPREP_NAMESPACE="${APP_NAMESPACE}-chatqna-dataprep" +CHATQNA_SWITCH_NAMESPACE="${APP_NAMESPACE}-chatqna-switch" CODEGEN_NAMESPACE="${APP_NAMESPACE}-codegen" CODETRANS_NAMESPACE="${APP_NAMESPACE}-codetrans" DOCSUM_NAMESPACE="${APP_NAMESPACE}-docsum" @@ -22,21 +23,25 @@ function validate_gmc() { echo "validate chat-qna with dataprep" validate_chatqna_with_dataprep - echo "validate codegen" - validate_codegen + echo "validate chat-qna in switch mode" + validate_chatqna_in_switch - echo "validate codetrans" - validate_codetrans + # echo "validate codegen" + # validate_codegen - echo "validate docsum" - validate_docsum + # echo "validate codetrans" + # validate_codetrans + + # echo "validate docsum" + # validate_docsum get_gmc_controller_logs } function cleanup_apps() { echo "clean up microservice-connector" - namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE") + # namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE") + namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE") for ns in "${namespaces[@]}"; do if kubectl get namespace $ns > /dev/null 2>&1; then echo "Deleting namespace: $ns" @@ -103,7 +108,7 @@ function validate_chatqna() { } function validate_chatqna_with_dataprep() { - kubectl create ns $CHATQNA_DATAPREP_NAMESPACE + kubectl create ns $CHATQNA_DATAPREP_NAMESPACE sed -i "s|namespace: chatqa|namespace: $CHATQNA_DATAPREP_NAMESPACE|g" $(pwd)/config/samples/chatQnA_dataprep_xeon.yaml # workaround for issue #268 yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_dataprep_xeon.yaml @@ -180,6 +185,87 @@ function validate_chatqna_with_dataprep() { fi } +function validate_chatqna_in_switch() { + kubectl create ns $CHATQNA_SWITCH_NAMESPACE + sed -i "s|namespace: switch|namespace: $CHATQNA_SWITCH_NAMESPACE|g" $(pwd)/config/samples/chatQnA_switch_xeon.yaml + # workaround for issue #268 + yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_switch_xeon.yaml + kubectl apply -f $(pwd)/config/samples/chatQnA_switch_xeon.yaml + + # Wait until the router service is ready + echo "Waiting for the chatqa router service to be ready..." + wait_until_pod_ready "chatqna router" $CHATQNA_SWITCH_NAMESPACE "router-service" + output=$(kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE) + echo $output + + # deploy client pod for testing + kubectl create deployment client-test -n $CHATQNA_SWITCH_NAMESPACE --image=python:3.8.13 -- sleep infinity + + # Wait until all pods are ready + wait_until_all_pod_ready $CHATQNA_SWITCH_NAMESPACE 300s + if [ $? -ne 0 ]; then + echo "Error Some pods are not ready!" + exit 1 + fi + + # giving time to populating data + sleep 90 + + kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE + # send request to chatqnA + export CLIENT_POD=$(kubectl get pod -n $CHATQNA_SWITCH_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name}) + echo "$CLIENT_POD" + accessUrl=$(kubectl get gmc -n $CHATQNA_SWITCH_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}") + + # test the chatqna with model condition: "model-id":"intel" and "embedding-model-id":"small" + kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"intel", "embedding-model-id":"small", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_intel.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_intel.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]]; then + cat $LOG_PATH/curl_chatqna_switch_intel.log + fi + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeed!" + fi + + # test the chatqna with model condition: "model-id":"llama" and "embedding-model-id":"large" + kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl -X POST -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"llama", "embedding-model-id":"large", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_llama.log + exit_code=$? + if [ $exit_code -ne 0 ]; then + echo "chatqna failed, please check the logs in ${LOG_PATH}!" + exit 1 + fi + + echo "Checking response results, make sure the output is reasonable. " + local status=false + if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]] && \ + [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_llama.log) != 0 ]]; then + status=true + fi + if [ $status == false ]; then + if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]]; then + cat $LOG_PATH/curl_chatqna_switch_llama.log + fi + echo "Response check failed, please check the logs in artifacts!" + exit 1 + else + echo "Response check succeed!" + fi +} + function validate_codegen() { kubectl create ns $CODEGEN_NAMESPACE sed -i "s|namespace: codegen|namespace: $CODEGEN_NAMESPACE|g" $(pwd)/config/samples/codegen_xeon.yaml diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml index 14385014d..f33cd29c4 100644 --- a/helm-charts/chatqna/gaudi-values.yaml +++ b/helm-charts/chatqna/gaudi-values.yaml @@ -8,6 +8,8 @@ tei: resources: limits: habana.ai/gaudi: 1 + securityContext: + readOnlyRootFilesystem: false # To override values in subchart tgi tgi: @@ -17,8 +19,5 @@ tgi: resources: limits: habana.ai/gaudi: 1 - extraArgs: - - "--max-input-length" - - "1024" - - "--max-total-tokens" - - "2048" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml index 20a959edd..66dd2d36e 100644 --- a/helm-charts/codegen/gaudi-values.yaml +++ b/helm-charts/codegen/gaudi-values.yaml @@ -8,8 +8,5 @@ tgi: resources: limits: habana.ai/gaudi: 1 - extraArgs: - - "--max-input-length" - - "1024" - - "--max-total-tokens" - - "2048" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml index 20a959edd..66dd2d36e 100644 --- a/helm-charts/codetrans/gaudi-values.yaml +++ b/helm-charts/codetrans/gaudi-values.yaml @@ -8,8 +8,5 @@ tgi: resources: limits: habana.ai/gaudi: 1 - extraArgs: - - "--max-input-length" - - "1024" - - "--max-total-tokens" - - "2048" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" diff --git a/helm-charts/common/data-prep/templates/deployment.yaml b/helm-charts/common/data-prep/templates/deployment.yaml index bc0103ab5..607330a44 100644 --- a/helm-charts/common/data-prep/templates/deployment.yaml +++ b/helm-charts/common/data-prep/templates/deployment.yaml @@ -45,12 +45,6 @@ spec: - name: data-prep containerPort: 6007 protocol: TCP - # The following need to be modified after GenAIComps bug #282 is resolved. - # https://github.com/opea-project/GenAIComps/issues/282 - - containerPort: 6008 - protocol: TCP - - containerPort: 6009 - protocol: TCP volumeMounts: - mountPath: /tmp name: tmp diff --git a/helm-charts/common/data-prep/templates/service.yaml b/helm-charts/common/data-prep/templates/service.yaml index 072127dfc..7b8ab7dfc 100644 --- a/helm-charts/common/data-prep/templates/service.yaml +++ b/helm-charts/common/data-prep/templates/service.yaml @@ -10,11 +10,9 @@ metadata: spec: type: {{ .Values.service.type }} ports: - {{- range .Values.service.ports }} - - port: {{ .port }} - targetPort: {{ .targetPort }} + - port: {{ .Values.service.port }} + targetPort: 6007 protocol: TCP - name: {{ .name }} - {{- end }} + name: data-prep selector: {{- include "data-prep.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/common/data-prep/templates/tests/test-pod.yaml b/helm-charts/common/data-prep/templates/tests/test-pod.yaml index f1217cd2a..63fb55886 100644 --- a/helm-charts/common/data-prep/templates/tests/test-pod.yaml +++ b/helm-charts/common/data-prep/templates/tests/test-pod.yaml @@ -18,12 +18,9 @@ spec: args: - | echo "test file" > /tmp/file1.txt; - {{- with index .Values.service.ports 0 }} - export port={{.port}}; - {{- end }} max_retry=20; for ((i=1; i<=max_retry; i++)); do - curl http://{{ include "data-prep.fullname" . }}:$port/v1/dataprep -sS --fail-with-body \ + curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep -sS --fail-with-body \ -X POST \ -H "Content-Type: multipart/form-data" \ -F "files=@/tmp/file1.txt" && break; diff --git a/helm-charts/common/data-prep/values.yaml b/helm-charts/common/data-prep/values.yaml index efe9dc4b7..458d7c0d5 100644 --- a/helm-charts/common/data-prep/values.yaml +++ b/helm-charts/common/data-prep/values.yaml @@ -38,19 +38,7 @@ securityContext: service: type: ClusterIP - # The following need to be modified after GenAIComps bug #282 is resolved. - # https://github.com/opea-project/GenAIComps/issues/282 - ports: - # The default port for data prep service is 6007 - - port: 6007 - targetPort: 6007 - name: data-prep - - port: 6008 - targetPort: 6008 - name: data-prep-get - - port: 6009 - targetPort: 6009 - name: data-prep-delete + port: 6007 resources: {} # We usually recommend not to specify default resources and to leave this as a conscious diff --git a/helm-charts/common/speecht5/templates/deployment.yaml b/helm-charts/common/speecht5/templates/deployment.yaml index f29013eaf..475a99b89 100644 --- a/helm-charts/common/speecht5/templates/deployment.yaml +++ b/helm-charts/common/speecht5/templates/deployment.yaml @@ -74,6 +74,9 @@ spec: hostPath: path: {{ .Values.global.modelUseHostPath }} type: Directory + {{- else if .Values.global.modelUsePV }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePV }} {{- else }} emptyDir: {} {{- end }} diff --git a/helm-charts/common/speecht5/values.yaml b/helm-charts/common/speecht5/values.yaml index 2d217ddb6..9e614f737 100644 --- a/helm-charts/common/speecht5/values.yaml +++ b/helm-charts/common/speecht5/values.yaml @@ -84,4 +84,10 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + + # Choose where to save your downloaded models + # modelUseHostPath: Host directory path, this is good for one node test. + # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment + # comment out both will not have model cache directory and download the model from huggingface. modelUseHostPath: /mnt/opea-models + # modelUsePV: model-volume diff --git a/helm-charts/common/tei/gaudi-values.yaml b/helm-charts/common/tei/gaudi-values.yaml index b5dbbeede..9d1b2690a 100644 --- a/helm-charts/common/tei/gaudi-values.yaml +++ b/helm-charts/common/tei/gaudi-values.yaml @@ -9,6 +9,9 @@ image: repository: ghcr.io/huggingface/tei-gaudi tag: synapse_1.16 +securityContext: + readOnlyRootFilesystem: false + resources: limits: habana.ai/gaudi: 1 diff --git a/helm-charts/common/tei/templates/deployment.yaml b/helm-charts/common/tei/templates/deployment.yaml index 381226323..cbb2e3cad 100644 --- a/helm-charts/common/tei/templates/deployment.yaml +++ b/helm-charts/common/tei/templates/deployment.yaml @@ -78,6 +78,9 @@ spec: hostPath: path: {{ .Values.global.modelUseHostPath }} type: Directory + {{- else if .Values.global.modelUsePV }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePV }} {{- else }} emptyDir: {} {{- end }} diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml index 54545d809..9ce013fdb 100644 --- a/helm-charts/common/tei/values.yaml +++ b/helm-charts/common/tei/values.yaml @@ -82,6 +82,10 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - # set modelUseHostPath to host directory if you want to use hostPath volume for model storage - # comment out modeluseHostPath if you want to download the model from huggingface + + # Choose where to save your downloaded models + # modelUseHostPath: Host directory path, this is good for one node test. + # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment + # comment out both will not have model cache directory and download the model from huggingface. modelUseHostPath: /mnt/opea-models + # modelUsePV: model-volume diff --git a/helm-charts/common/teirerank/templates/deployment.yaml b/helm-charts/common/teirerank/templates/deployment.yaml index ff2c84a8e..3a75f8772 100644 --- a/helm-charts/common/teirerank/templates/deployment.yaml +++ b/helm-charts/common/teirerank/templates/deployment.yaml @@ -78,6 +78,9 @@ spec: hostPath: path: {{ .Values.global.modelUseHostPath }} type: Directory + {{- else if .Values.global.modelUsePV }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePV }} {{- else }} emptyDir: {} {{- end }} diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml index b0062f1b8..2b8e90fc8 100644 --- a/helm-charts/common/teirerank/values.yaml +++ b/helm-charts/common/teirerank/values.yaml @@ -82,6 +82,10 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" - # set modelUseHostPath to host directory if you want to use hostPath volume for model storage - # comment out modeluseHostPath if you want to download the model from huggingface + + # Choose where to save your downloaded models + # modelUseHostPath: Host directory path, this is good for one node test. + # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment + # comment out both will not have model cache directory and download the model from huggingface. modelUseHostPath: /mnt/opea-models + # modelUsePV: model-volume diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml index 988d1109d..07b740e55 100644 --- a/helm-charts/common/tgi/gaudi-values.yaml +++ b/helm-charts/common/tgi/gaudi-values.yaml @@ -9,11 +9,8 @@ image: repository: ghcr.io/huggingface/tgi-gaudi tag: "2.0.1" -extraArgs: - - "--max-input-length" - - "1024" - - "--max-total-tokens" - - "2048" +MAX_INPUT_LENGTH: "1024" +MAX_TOTAL_TOKENS: "2048" resources: limits: diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml index f11acef19..f5f0ff1a0 100644 --- a/helm-charts/common/tgi/templates/configmap.yaml +++ b/helm-charts/common/tgi/templates/configmap.yaml @@ -18,3 +18,9 @@ data: NUMBA_CACHE_DIR: "/tmp" TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" + {{- if .Values.MAX_INPUT_LENGTH }} + MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }} + {{- end }} + {{- if .Values.MAX_TOTAL_TOKENS }} + MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }} + {{- end }} diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml index 967ef832a..9dea3dc15 100644 --- a/helm-charts/common/tgi/templates/deployment.yaml +++ b/helm-charts/common/tgi/templates/deployment.yaml @@ -45,10 +45,6 @@ spec: {{- end }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} - {{- if .Values.extraArgs }} - args: - {{- toYaml .Values.extraArgs | nindent 12}} - {{- end }} volumeMounts: - mountPath: /data name: model-volume @@ -78,6 +74,9 @@ spec: hostPath: path: {{ .Values.global.modelUseHostPath }} type: Directory + {{- else if .Values.global.modelUsePV }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePV }} {{- else }} emptyDir: {} {{- end }} diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml index 7f7d78e92..26893ecd5 100644 --- a/helm-charts/common/tgi/values.yaml +++ b/helm-charts/common/tgi/values.yaml @@ -98,11 +98,18 @@ affinity: {} LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 +MAX_INPUT_LENGTH: "" +MAX_TOTAL_TOKENS: "" + global: http_proxy: "" https_proxy: "" no_proxy: "" HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" - # set modelUseHostPath to host directory if you want to use hostPath volume for model storage - # comment out modeluseHostPath if you want to download the model from huggingface + + # Choose where to save your downloaded models + # modelUseHostPath: Host directory path, this is good for one node test. + # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment + # comment out both will not have model cache directory and download the model from huggingface. modelUseHostPath: /mnt/opea-models + # modelUsePV: model-volume diff --git a/helm-charts/common/whisper/templates/deployment.yaml b/helm-charts/common/whisper/templates/deployment.yaml index d766c62dd..8a9c1a1f9 100644 --- a/helm-charts/common/whisper/templates/deployment.yaml +++ b/helm-charts/common/whisper/templates/deployment.yaml @@ -74,6 +74,9 @@ spec: hostPath: path: {{ .Values.global.modelUseHostPath }} type: Directory + {{- else if .Values.global.modelUsePV }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePV }} {{- else }} emptyDir: {} {{- end }} diff --git a/helm-charts/common/whisper/values.yaml b/helm-charts/common/whisper/values.yaml index 9a7bb6fbe..8c207fb8a 100644 --- a/helm-charts/common/whisper/values.yaml +++ b/helm-charts/common/whisper/values.yaml @@ -83,4 +83,10 @@ global: http_proxy: "" https_proxy: "" no_proxy: "" + + # Choose where to save your downloaded models + # modelUseHostPath: Host directory path, this is good for one node test. + # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment + # comment out both will not have model cache directory and download the model from huggingface. modelUseHostPath: /mnt/opea-models + # modelUsePV: model-volume diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml index 20a959edd..66dd2d36e 100644 --- a/helm-charts/docsum/gaudi-values.yaml +++ b/helm-charts/docsum/gaudi-values.yaml @@ -8,8 +8,5 @@ tgi: resources: limits: habana.ai/gaudi: 1 - extraArgs: - - "--max-input-length" - - "1024" - - "--max-total-tokens" - - "2048" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml index 99bf2cf5f..65e458ae9 100644 --- a/helm-charts/docsum/values.yaml +++ b/helm-charts/docsum/values.yaml @@ -36,6 +36,11 @@ tolerations: [] affinity: {} +# To override values in subchart llm-uservice +llm-uservice: + image: + repository: opea/llm-docsum-tgi + # To override values in subchart tgi tgi: LLM_MODEL_ID: Intel/neural-chat-7b-v3-3 diff --git a/microservices-connector/cmd/router/main.go b/microservices-connector/cmd/router/main.go index b5508007b..73e8aecd0 100644 --- a/microservices-connector/cmd/router/main.go +++ b/microservices-connector/cmd/router/main.go @@ -11,7 +11,7 @@ package main import ( - "bufio" + // "bufio" "bytes" "context" "encoding/json" @@ -43,14 +43,19 @@ var ( log = logf.Log.WithName("GMCGraphRouter") mcGraph *mcv1alpha3.GMConnector defaultNodeName = "root" + Prefix = []byte("data: b'") + Suffix = []byte("'\n\n") + DONE = []byte("[DONE]") + Newline = []byte("\n") ) const ( - ChunkSize = 1024 + BufferSize = 1024 ServiceURL = "serviceUrl" ServiceNode = "node" DataPrep = "DataPrep" Parameters = "parameters" + Llm = "Llm" ) type EnsembleStepOutput struct { @@ -63,6 +68,19 @@ type GMCGraphRoutingError struct { Cause string `json:"cause"` } +type ReadCloser struct { + *bytes.Reader +} + +func (ReadCloser) Close() error { + // Typically, you would release resources here, but for bytes.Reader, there's nothing to do. + return nil +} + +func NewReadCloser(b []byte) io.ReadCloser { + return ReadCloser{bytes.NewReader(b)} +} + func (e *GMCGraphRoutingError) Error() string { return fmt.Sprintf("%s. %s", e.ErrorMessage, e.Cause) } @@ -127,7 +145,12 @@ func prepareErrorResponse(err error, errorMessage string) []byte { return errorResponseBytes } -func callService(step *mcv1alpha3.Step, serviceUrl string, input []byte, headers http.Header) ([]byte, int, error) { +func callService( + step *mcv1alpha3.Step, + serviceUrl string, + input []byte, + headers http.Header, +) (io.ReadCloser, int, error) { defer timeTrack(time.Now(), "step", serviceUrl) log.Info("Entering callService", "url", serviceUrl) @@ -157,21 +180,7 @@ func callService(step *mcv1alpha3.Step, serviceUrl string, input []byte, headers return nil, 500, err } - defer func() { - if resp.Body != nil { - err := resp.Body.Close() - if err != nil { - log.Error(err, "An error has occurred while closing the response body") - } - } - }() - - body, err := io.ReadAll(resp.Body) - if err != nil { - log.Error(err, "Error while reading the response") - } - - return body, resp.StatusCode, err + return resp.Body, resp.StatusCode, nil } // Use step service name to create a K8s service if serviceURL is empty @@ -190,7 +199,7 @@ func executeStep( initInput []byte, input []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { if step.NodeName != "" { // when nodeName is specified make a recursive call for routing to next step return routeStep(step.NodeName, graph, initInput, input, headers) @@ -231,16 +240,16 @@ func handleSwitchNode( initInput []byte, request []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { var statusCode int - var responseBytes []byte + var responseBody io.ReadCloser var err error stepType := ServiceURL if route.NodeName != "" { stepType = ServiceNode } log.Info("Starting execution of step", "Node Name", route.NodeName, "type", stepType, "stepName", route.StepName) - if responseBytes, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil { + if responseBody, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil { return nil, 500, err } @@ -253,7 +262,7 @@ func handleSwitchNode( statusCode, ) } - return responseBytes, statusCode, nil + return responseBody, statusCode, nil } func handleSwitchPipeline(nodeName string, @@ -261,9 +270,10 @@ func handleSwitchPipeline(nodeName string, initInput []byte, input []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { currentNode := graph.Spec.Nodes[nodeName] var statusCode int + var responseBody io.ReadCloser var responseBytes []byte var err error @@ -284,29 +294,41 @@ func handleSwitchPipeline(nodeName string, ) continue } + + // make sure that the process goes to the correct step + if route.Condition != "" { + if !pickupRouteByCondition(initInput, route.Condition) { + continue + } + } + log.Info("Current Step Information", "Node Name", nodeName, "Step Index", index) request := input + if responseBody != nil { + responseBytes, err = io.ReadAll(responseBody) + if err != nil { + log.Error(err, "Error while reading the response body") + return nil, 500, err + } + log.Info("Print Previous Response Bytes", "Previous Response Bytes", + responseBytes, "Previous Status Code", statusCode) + err = responseBody.Close() + if err != nil { + log.Error(err, "Error while trying to close the responseBody in handleSwitchPipeline") + } + } + log.Info("Print Original Request Bytes", "Request Bytes", request) if route.Data == "$response" && index > 0 { request = mergeRequests(responseBytes, initReqData) } log.Info("Print New Request Bytes", "Request Bytes", request) - if route.Condition == "" { - responseBytes, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers) - if err != nil { - return responseBytes, statusCode, err - } - } else { - if pickupRouteByCondition(initInput, route.Condition) { - responseBytes, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers) - if err != nil { - return responseBytes, statusCode, err - } - } + responseBody, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers) + if err != nil { + return nil, statusCode, err } - log.Info("Print Response Bytes", "Response Bytes", responseBytes, "Status Code", statusCode) } - return responseBytes, statusCode, err + return responseBody, statusCode, err } func handleEnsemblePipeline(nodeName string, @@ -314,7 +336,7 @@ func handleEnsemblePipeline(nodeName string, initInput []byte, input []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { currentNode := graph.Spec.Nodes[nodeName] ensembleRes := make([]chan EnsembleStepOutput, len(currentNode.Steps)) errChan := make(chan error) @@ -328,8 +350,12 @@ func handleEnsemblePipeline(nodeName string, resultChan := make(chan EnsembleStepOutput) ensembleRes[i] = resultChan go func() { - output, statusCode, err := executeStep(step, graph, initInput, input, headers) + responseBody, statusCode, err := executeStep(step, graph, initInput, input, headers) if err == nil { + output, rerr := io.ReadAll(responseBody) + if rerr != nil { + log.Error(rerr, "Error while reading the response body") + } var res map[string]interface{} if err = json.Unmarshal(output, &res); err == nil { resultChan <- EnsembleStepOutput{ @@ -339,6 +365,10 @@ func handleEnsemblePipeline(nodeName string, return } } + rerr := responseBody.Close() + if rerr != nil { + log.Error(rerr, "Error while trying to close the responseBody in handleEnsemblePipeline") + } errChan <- err }() } @@ -361,7 +391,8 @@ func handleEnsemblePipeline(nodeName string, ensembleStepOutput.StepStatusCode, ) stepResponse, _ := json.Marshal(ensembleStepOutput.StepResponse) - return stepResponse, ensembleStepOutput.StepStatusCode, nil + stepIOReader := NewReadCloser(stepResponse) + return stepIOReader, ensembleStepOutput.StepStatusCode, nil } else { response[key] = ensembleStepOutput.StepResponse } @@ -371,7 +402,8 @@ func handleEnsemblePipeline(nodeName string, } // return json.Marshal(response) combinedResponse, _ := json.Marshal(response) // TODO check if you need err handling for Marshalling - return combinedResponse, 200, nil + combinedIOReader := NewReadCloser(combinedResponse) + return combinedIOReader, 200, nil } func handleSequencePipeline(nodeName string, @@ -379,9 +411,10 @@ func handleSequencePipeline(nodeName string, initInput []byte, input []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { currentNode := graph.Spec.Nodes[nodeName] var statusCode int + var responseBody io.ReadCloser var responseBytes []byte var err error @@ -409,6 +442,20 @@ func handleSequencePipeline(nodeName string, log.Info("Starting execution of step", "type", stepType, "stepName", step.StepName) request := input log.Info("Print Original Request Bytes", "Request Bytes", request) + if responseBody != nil { + responseBytes, err = io.ReadAll(responseBody) + if err != nil { + log.Error(err, "Error while reading the response body") + return nil, 500, err + } + log.Info("Print Previous Response Bytes", "Previous Response Bytes", + responseBytes, "Previous Status Code", statusCode) + err := responseBody.Close() + if err != nil { + log.Error(err, "Error while trying to close the responseBody in handleSequencePipeline") + } + } + if step.Data == "$response" && i > 0 { request = mergeRequests(responseBytes, initReqData) } @@ -419,13 +466,12 @@ func handleSequencePipeline(nodeName string, } // if the condition does not match for the step in the sequence we stop and return the response if !gjson.GetBytes(responseBytes, step.Condition).Exists() { - return responseBytes, 500, nil + return responseBody, 500, nil } } - if responseBytes, statusCode, err = executeStep(step, graph, initInput, request, headers); err != nil { + if responseBody, statusCode, err = executeStep(step, graph, initInput, request, headers); err != nil { return nil, 500, err } - log.Info("Print Response Bytes", "Response Bytes", responseBytes, "Status Code", statusCode) /* Only if a step is a hard dependency, we will check for its success. */ @@ -439,18 +485,18 @@ func handleSequencePipeline(nodeName string, statusCode, ) // Stop the execution of sequence right away if step is a hard dependency and is unsuccessful - return responseBytes, statusCode, nil + return responseBody, statusCode, nil } } } - return responseBytes, statusCode, nil + return responseBody, statusCode, nil } func routeStep(nodeName string, graph mcv1alpha3.GMConnector, initInput, input []byte, headers http.Header, -) ([]byte, int, error) { +) (io.ReadCloser, int, error) { defer timeTrack(time.Now(), "node", nodeName) currentNode := graph.Spec.Nodes[nodeName] log.Info("Current Node", "Node Name", nodeName) @@ -478,9 +524,14 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) { go func() { defer close(done) - inputBytes, _ := io.ReadAll(req.Body) - response, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header) + inputBytes, err := io.ReadAll(req.Body) + if err != nil { + log.Error(err, "failed to read request body") + http.Error(w, "failed to read request body", http.StatusBadRequest) + return + } + responseBody, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header) if err != nil { log.Error(err, "failed to process request") w.Header().Set("Content-Type", "application/json") @@ -490,37 +541,53 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) { } return } - if json.Valid(response) { - w.Header().Set("Content-Type", "application/json") - } - w.WriteHeader(statusCode) - - writer := bufio.NewWriter(w) defer func() { - if err := writer.Flush(); err != nil { - log.Error(err, "error flushing writer when processing response") + err := responseBody.Close() + if err != nil { + log.Error(err, "Error while trying to close the responseBody in mcGraphHandler") } }() - for start := 0; start < len(response); start += ChunkSize { - end := start + ChunkSize - if end > len(response) { - end = len(response) - } - if _, err := writer.Write(response[start:end]); err != nil { - log.Error(err, "failed to write mcGraphHandler response") + w.Header().Set("Content-Type", "application/json") + buffer := make([]byte, BufferSize) + for { + n, err := responseBody.Read(buffer) + if err != nil && err != io.EOF { + log.Error(err, "failed to read from response body") + http.Error(w, "failed to read from response body", http.StatusInternalServerError) return } + if n == 0 { + break + } - if err := writer.Flush(); err != nil { - log.Error(err, "error flushing writer when processing response") + /*sliceBF := buffer[:n] + if !bytes.HasPrefix(sliceBF, DONE) { + sliceBF = bytes.TrimPrefix(sliceBF, Prefix) + sliceBF = bytes.TrimSuffix(sliceBF, Suffix) + } else { + sliceBF = bytes.Join([][]byte{Newline, sliceBF}, nil) + } + log.Info("[llm - chat_stream] chunk:", "Buffer", string(sliceBF))*/ + + // Write the chunk to the ResponseWriter + if _, err := w.Write(buffer[:n]); err != nil { + log.Error(err, "failed to write to ResponseWriter") + return + } + // Flush the data to the client immediately + if flusher, ok := w.(http.Flusher); ok { + flusher.Flush() + } else { + log.Error(errors.New("unable to flush data"), "ResponseWriter does not support flushing") + return } } }() select { case <-ctx.Done(): - log.Error(errors.New("failed to process request"), "request timed out") + log.Error(errors.New("request timed out"), "failed to process request") http.Error(w, "request timed out", http.StatusGatewayTimeout) case <-done: log.Info("mcGraphHandler is done") diff --git a/microservices-connector/cmd/router/main_test.go b/microservices-connector/cmd/router/main_test.go index 25afa9129..6f2d7e54b 100644 --- a/microservices-connector/cmd/router/main_test.go +++ b/microservices-connector/cmd/router/main_test.go @@ -116,7 +116,12 @@ func TestSimpleModelChainer(t *testing.T) { return } var response map[string]interface{} - err = json.Unmarshal(res, &response) + responseBytes, rerr := io.ReadAll(res) + if rerr != nil { + t.Fatalf("Error while reading the response body: %v", rerr) + return + } + err = json.Unmarshal(responseBytes, &response) if err != nil { return } @@ -217,7 +222,12 @@ func TestSimpleServiceEnsemble(t *testing.T) { return } var response map[string]interface{} - err = json.Unmarshal(res, &response) + responseBytes, rerr := io.ReadAll(res) + if rerr != nil { + t.Fatalf("Error while reading the response body") + return + } + err = json.Unmarshal(responseBytes, &response) if err != nil { return } @@ -452,7 +462,12 @@ func TestMCWithCondition(t *testing.T) { return } var response map[string]interface{} - err = json.Unmarshal(res, &response) + responseBytes, rerr := io.ReadAll(res) + if rerr != nil { + t.Fatalf("Error while reading the response body") + return + } + err = json.Unmarshal(responseBytes, &response) if err != nil { return } @@ -536,7 +551,12 @@ func TestCallServiceWhenNoneHeadersToPropagateIsEmpty(t *testing.T) { return } var response map[string]interface{} - err = json.Unmarshal(res, &response) + responseBytes, rerr := io.ReadAll(res) + if rerr != nil { + t.Fatalf("Error while reading the response body") + return + } + err = json.Unmarshal(responseBytes, &response) if err != nil { return } diff --git a/microservices-connector/config/manifests/data-prep.yaml b/microservices-connector/config/manifests/data-prep.yaml index 7212b6751..f94954922 100644 --- a/microservices-connector/config/manifests/data-prep.yaml +++ b/microservices-connector/config/manifests/data-prep.yaml @@ -48,14 +48,6 @@ spec: targetPort: 6007 protocol: TCP name: data-prep - - port: 6008 - targetPort: 6008 - protocol: TCP - name: data-prep-get - - port: 6009 - targetPort: 6009 - protocol: TCP - name: data-prep-delete selector: app.kubernetes.io/name: data-prep app.kubernetes.io/instance: data-prep @@ -112,12 +104,6 @@ spec: - name: data-prep containerPort: 6007 protocol: TCP - # The following need to be modified after GenAIComps bug #282 is resolved. - # https://github.com/opea-project/GenAIComps/issues/282 - - containerPort: 6008 - protocol: TCP - - containerPort: 6009 - protocol: TCP volumeMounts: - mountPath: /tmp name: tmp diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml index 9b1f381ae..00ea67c90 100644 --- a/microservices-connector/config/manifests/tgi_gaudi.yaml +++ b/microservices-connector/config/manifests/tgi_gaudi.yaml @@ -24,6 +24,8 @@ data: NUMBA_CACHE_DIR: "/tmp" TRANSFORMERS_CACHE: "/tmp/transformers_cache" HF_HOME: "/tmp/.cache/huggingface" + MAX_INPUT_LENGTH: "1024" + MAX_TOTAL_TOKENS: "2048" --- # Source: tgi/templates/service.yaml # Copyright (C) 2024 Intel Corporation @@ -90,11 +92,6 @@ spec: {} image: "ghcr.io/huggingface/tgi-gaudi:2.0.1" imagePullPolicy: IfNotPresent - args: - - --max-input-length - - "1024" - - --max-total-tokens - - "2048" volumeMounts: - mountPath: /data name: model-volume