diff --git a/.github/workflows/image-build-on-manual.yaml b/.github/workflows/image-build-on-manual.yaml
index d495e3371..7823c5782 100644
--- a/.github/workflows/image-build-on-manual.yaml
+++ b/.github/workflows/image-build-on-manual.yaml
@@ -41,6 +41,7 @@ jobs:
           echo "nodes=$nodes" >> $GITHUB_OUTPUT
 
   image-build:
+    needs: get-build-matrix
     strategy:
       matrix:
         node: ${{ fromJSON(needs.get-build-matrix.outputs.nodes) }}
diff --git a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh
index 7cf9352b0..454c924cb 100755
--- a/.github/workflows/scripts/e2e/gmc_gaudi_test.sh
+++ b/.github/workflows/scripts/e2e/gmc_gaudi_test.sh
@@ -11,6 +11,7 @@ USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
 CHATQNA_NAMESPACE="${APP_NAMESPACE}-chatqna"
 CHATQNA_DATAPREP_NAMESPACE="${APP_NAMESPACE}-chatqna-dataprep"
+CHATQNA_SWITCH_NAMESPACE="${APP_NAMESPACE}-chatqna-switch"
 CODEGEN_NAMESPACE="${APP_NAMESPACE}-codegen"
 CODETRANS_NAMESPACE="${APP_NAMESPACE}-codetrans"
 DOCSUM_NAMESPACE="${APP_NAMESPACE}-docsum"
@@ -22,21 +23,25 @@ function validate_gmc() {
     echo "validate chat-qna with dataprep"
     validate_chatqna_with_dataprep
 
-    echo "validate codegen"
-    validate_codegen
+    echo "validate chat-qna in switch mode"
+    validate_chatqna_in_switch
 
-    echo "validate codetrans"
-    validate_codetrans
+    # echo "validate codegen"
+    # validate_codegen
 
-    echo "validate docsum"
-    validate_docsum
+    # echo "validate codetrans"
+    # validate_codetrans
+
+    # echo "validate docsum"
+    # validate_docsum
 
     get_gmc_controller_logs
 }
 
 function cleanup_apps() {
     echo "clean up microservice-connector"
-    namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE")
+    # namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE")
+    namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE")
     for ns in "${namespaces[@]}"; do
         if kubectl get namespace $ns > /dev/null 2>&1; then
             echo "Deleting namespace: $ns"
@@ -101,7 +106,7 @@ function validate_chatqna() {
 }
 
 function validate_chatqna_with_dataprep() {
-    kubectl create ns $CHATQNA_DATAPREP_NAMESPACE
+   kubectl create ns $CHATQNA_DATAPREP_NAMESPACE
    sed -i "s|namespace: chatqa|namespace: $CHATQNA_DATAPREP_NAMESPACE|g"  $(pwd)/config/samples/chatQnA_dataprep_gaudi.yaml
    # workaround for issue #268
    yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_dataprep_gaudi.yaml
@@ -178,6 +183,87 @@ function validate_chatqna_with_dataprep() {
    fi
 }
 
+function validate_chatqna_in_switch() {
+   kubectl create ns $CHATQNA_SWITCH_NAMESPACE
+   sed -i "s|namespace: switch|namespace: $CHATQNA_SWITCH_NAMESPACE|g"  $(pwd)/config/samples/chatQnA_switch_gaudi.yaml
+   # workaround for issue #268
+   yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_switch_gaudi.yaml
+   kubectl apply -f $(pwd)/config/samples/chatQnA_switch_gaudi.yaml
+
+   # Wait until the router service is ready
+   echo "Waiting for the chatqa router service to be ready..."
+   wait_until_pod_ready "chatqna router" $CHATQNA_SWITCH_NAMESPACE "router-service"
+   output=$(kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE)
+   echo $output
+
+   # deploy client pod for testing
+   kubectl create deployment client-test -n $CHATQNA_SWITCH_NAMESPACE --image=python:3.8.13 -- sleep infinity
+
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $CHATQNA_SWITCH_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
+   # giving time to populating data
+   sleep 90
+
+   kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE
+   # send request to chatqnA
+   export CLIENT_POD=$(kubectl get pod -n $CHATQNA_SWITCH_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
+   echo "$CLIENT_POD"
+   accessUrl=$(kubectl get gmc -n $CHATQNA_SWITCH_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}")
+
+   # test the chatqna with model condition: "model-id":"intel" and "embedding-model-id":"small"
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"intel", "embedding-model-id":"small", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_intel.log
+   exit_code=$?
+   if [ $exit_code -ne 0 ]; then
+       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
+       exit 1
+   fi
+
+   echo "Checking response results, make sure the output is reasonable. "
+   local status=false
+   if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]] && \
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_intel.log) != 0 ]]; then
+       status=true
+   fi
+   if [ $status == false ]; then
+       if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]]; then
+           cat $LOG_PATH/curl_chatqna_switch_intel.log
+       fi
+       echo "Response check failed, please check the logs in artifacts!"
+       exit 1
+   else
+       echo "Response check succeed!"
+   fi
+
+   # test the chatqna with model condition: "model-id":"llama" and "embedding-model-id":"large"
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"llama", "embedding-model-id":"large", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_llama.log
+   exit_code=$?
+   if [ $exit_code -ne 0 ]; then
+       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
+       exit 1
+   fi
+
+   echo "Checking response results, make sure the output is reasonable. "
+   local status=false
+   if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]] && \
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_llama.log) != 0 ]]; then
+       status=true
+   fi
+   if [ $status == false ]; then
+       if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]]; then
+           cat $LOG_PATH/curl_chatqna_switch_llama.log
+       fi
+       echo "Response check failed, please check the logs in artifacts!"
+       exit 1
+   else
+       echo "Response check succeed!"
+   fi
+}
+
 function validate_codegen() {
    kubectl create ns $CODEGEN_NAMESPACE
    sed -i "s|namespace: codegen|namespace: $CODEGEN_NAMESPACE|g"  $(pwd)/config/samples/codegen_gaudi.yaml
diff --git a/.github/workflows/scripts/e2e/gmc_install.sh b/.github/workflows/scripts/e2e/gmc_install.sh
index b94ce39c1..f07004063 100755
--- a/.github/workflows/scripts/e2e/gmc_install.sh
+++ b/.github/workflows/scripts/e2e/gmc_install.sh
@@ -64,7 +64,7 @@ function cleanup_gmc() {
     if kubectl get namespace $SYSTEM_NAMESPACE > /dev/null 2>&1; then
         echo "Deleting namespace: $SYSTEM_NAMESPACE"
         kubectl delete namespace "$SYSTEM_NAMESPACE"
-        kubectl delete crd gmconnectors.gmc.opea.io
+        kubectl delete crd gmconnectors.gmc.opea.io || true
     else
         echo "Namespace $SYSTEM_NAMESPACE does not exist"
     fi
diff --git a/.github/workflows/scripts/e2e/gmc_xeon_test.sh b/.github/workflows/scripts/e2e/gmc_xeon_test.sh
index 8ee1867b9..16a5fff39 100755
--- a/.github/workflows/scripts/e2e/gmc_xeon_test.sh
+++ b/.github/workflows/scripts/e2e/gmc_xeon_test.sh
@@ -11,6 +11,7 @@ USER_ID=$(whoami)
 LOG_PATH=/home/$(whoami)/logs
 CHATQNA_NAMESPACE="${APP_NAMESPACE}-chatqna"
 CHATQNA_DATAPREP_NAMESPACE="${APP_NAMESPACE}-chatqna-dataprep"
+CHATQNA_SWITCH_NAMESPACE="${APP_NAMESPACE}-chatqna-switch"
 CODEGEN_NAMESPACE="${APP_NAMESPACE}-codegen"
 CODETRANS_NAMESPACE="${APP_NAMESPACE}-codetrans"
 DOCSUM_NAMESPACE="${APP_NAMESPACE}-docsum"
@@ -22,21 +23,25 @@ function validate_gmc() {
     echo "validate chat-qna with dataprep"
     validate_chatqna_with_dataprep
 
-    echo "validate codegen"
-    validate_codegen
+    echo "validate chat-qna in switch mode"
+    validate_chatqna_in_switch
 
-    echo "validate codetrans"
-    validate_codetrans
+    # echo "validate codegen"
+    # validate_codegen
 
-    echo "validate docsum"
-    validate_docsum
+    # echo "validate codetrans"
+    # validate_codetrans
+
+    # echo "validate docsum"
+    # validate_docsum
 
     get_gmc_controller_logs
 }
 
 function cleanup_apps() {
     echo "clean up microservice-connector"
-    namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE")
+    # namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE" "$CODEGEN_NAMESPACE" "$CODETRANS_NAMESPACE" "$DOCSUM_NAMESPACE")
+    namespaces=("$CHATQNA_NAMESPACE" "$CHATQNA_DATAPREP_NAMESPACE" "$CHATQNA_SWITCH_NAMESPACE")
     for ns in "${namespaces[@]}"; do
         if kubectl get namespace $ns > /dev/null 2>&1; then
             echo "Deleting namespace: $ns"
@@ -103,7 +108,7 @@ function validate_chatqna() {
 }
 
 function validate_chatqna_with_dataprep() {
-    kubectl create ns $CHATQNA_DATAPREP_NAMESPACE
+   kubectl create ns $CHATQNA_DATAPREP_NAMESPACE
    sed -i "s|namespace: chatqa|namespace: $CHATQNA_DATAPREP_NAMESPACE|g"  $(pwd)/config/samples/chatQnA_dataprep_xeon.yaml
    # workaround for issue #268
    yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_dataprep_xeon.yaml
@@ -180,6 +185,87 @@ function validate_chatqna_with_dataprep() {
    fi
 }
 
+function validate_chatqna_in_switch() {
+   kubectl create ns $CHATQNA_SWITCH_NAMESPACE
+   sed -i "s|namespace: switch|namespace: $CHATQNA_SWITCH_NAMESPACE|g"  $(pwd)/config/samples/chatQnA_switch_xeon.yaml
+   # workaround for issue #268
+   yq -i '(.spec.nodes.root.steps[] | select ( .name == "Tgi")).internalService.config.MODEL_ID = "bigscience/bloom-560m"' $(pwd)/config/samples/chatQnA_switch_xeon.yaml
+   kubectl apply -f $(pwd)/config/samples/chatQnA_switch_xeon.yaml
+
+   # Wait until the router service is ready
+   echo "Waiting for the chatqa router service to be ready..."
+   wait_until_pod_ready "chatqna router" $CHATQNA_SWITCH_NAMESPACE "router-service"
+   output=$(kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE)
+   echo $output
+
+   # deploy client pod for testing
+   kubectl create deployment client-test -n $CHATQNA_SWITCH_NAMESPACE --image=python:3.8.13 -- sleep infinity
+
+   # Wait until all pods are ready
+   wait_until_all_pod_ready $CHATQNA_SWITCH_NAMESPACE 300s
+   if [ $? -ne 0 ]; then
+       echo "Error Some pods are not ready!"
+       exit 1
+   fi
+
+   # giving time to populating data
+   sleep 90
+
+   kubectl get pods -n $CHATQNA_SWITCH_NAMESPACE
+   # send request to chatqnA
+   export CLIENT_POD=$(kubectl get pod -n $CHATQNA_SWITCH_NAMESPACE -l app=client-test -o jsonpath={.items..metadata.name})
+   echo "$CLIENT_POD"
+   accessUrl=$(kubectl get gmc -n $CHATQNA_SWITCH_NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='switch')].status.accessUrl}")
+
+   # test the chatqna with model condition: "model-id":"intel" and "embedding-model-id":"small"
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"intel", "embedding-model-id":"small", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_intel.log
+   exit_code=$?
+   if [ $exit_code -ne 0 ]; then
+       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
+       exit 1
+   fi
+
+   echo "Checking response results, make sure the output is reasonable. "
+   local status=false
+   if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]] && \
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_intel.log) != 0 ]]; then
+       status=true
+   fi
+   if [ $status == false ]; then
+       if [[ -f $LOG_PATH/curl_chatqna_switch_intel.log ]]; then
+           cat $LOG_PATH/curl_chatqna_switch_intel.log
+       fi
+       echo "Response check failed, please check the logs in artifacts!"
+       exit 1
+   else
+       echo "Response check succeed!"
+   fi
+
+   # test the chatqna with model condition: "model-id":"llama" and "embedding-model-id":"large"
+   kubectl exec "$CLIENT_POD" -n $CHATQNA_SWITCH_NAMESPACE -- curl $accessUrl  -X POST  -d '{"text":"What is the revenue of Nike in 2023?", "model-id":"llama", "embedding-model-id":"large", "parameters":{"max_new_tokens":17, "do_sample": true}}' -H 'Content-Type: application/json' > $LOG_PATH/curl_chatqna_switch_llama.log
+   exit_code=$?
+   if [ $exit_code -ne 0 ]; then
+       echo "chatqna failed, please check the logs in ${LOG_PATH}!"
+       exit 1
+   fi
+
+   echo "Checking response results, make sure the output is reasonable. "
+   local status=false
+   if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]] && \
+   [[ $(grep -c "[DONE]" $LOG_PATH/curl_chatqna_switch_llama.log) != 0 ]]; then
+       status=true
+   fi
+   if [ $status == false ]; then
+       if [[ -f $LOG_PATH/curl_chatqna_switch_llama.log ]]; then
+           cat $LOG_PATH/curl_chatqna_switch_llama.log
+       fi
+       echo "Response check failed, please check the logs in artifacts!"
+       exit 1
+   else
+       echo "Response check succeed!"
+   fi
+}
+
 function validate_codegen() {
    kubectl create ns $CODEGEN_NAMESPACE
    sed -i "s|namespace: codegen|namespace: $CODEGEN_NAMESPACE|g"  $(pwd)/config/samples/codegen_xeon.yaml
diff --git a/helm-charts/chatqna/gaudi-values.yaml b/helm-charts/chatqna/gaudi-values.yaml
index 14385014d..f33cd29c4 100644
--- a/helm-charts/chatqna/gaudi-values.yaml
+++ b/helm-charts/chatqna/gaudi-values.yaml
@@ -8,6 +8,8 @@ tei:
   resources:
     limits:
       habana.ai/gaudi: 1
+  securityContext:
+    readOnlyRootFilesystem: false
 
 # To override values in subchart tgi
 tgi:
@@ -17,8 +19,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/codegen/gaudi-values.yaml b/helm-charts/codegen/gaudi-values.yaml
index 20a959edd..66dd2d36e 100644
--- a/helm-charts/codegen/gaudi-values.yaml
+++ b/helm-charts/codegen/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/codetrans/gaudi-values.yaml b/helm-charts/codetrans/gaudi-values.yaml
index 20a959edd..66dd2d36e 100644
--- a/helm-charts/codetrans/gaudi-values.yaml
+++ b/helm-charts/codetrans/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/common/data-prep/templates/deployment.yaml b/helm-charts/common/data-prep/templates/deployment.yaml
index bc0103ab5..607330a44 100644
--- a/helm-charts/common/data-prep/templates/deployment.yaml
+++ b/helm-charts/common/data-prep/templates/deployment.yaml
@@ -45,12 +45,6 @@ spec:
             - name: data-prep
               containerPort: 6007
               protocol: TCP
-            # The following need to be modified after GenAIComps bug #282 is resolved.
-            # https://github.com/opea-project/GenAIComps/issues/282
-            - containerPort: 6008
-              protocol: TCP
-            - containerPort: 6009
-              protocol: TCP
           volumeMounts:
             - mountPath: /tmp
               name: tmp
diff --git a/helm-charts/common/data-prep/templates/service.yaml b/helm-charts/common/data-prep/templates/service.yaml
index 072127dfc..7b8ab7dfc 100644
--- a/helm-charts/common/data-prep/templates/service.yaml
+++ b/helm-charts/common/data-prep/templates/service.yaml
@@ -10,11 +10,9 @@ metadata:
 spec:
   type: {{ .Values.service.type }}
   ports:
-    {{- range .Values.service.ports }}
-    - port: {{ .port }}
-      targetPort: {{ .targetPort }}
+    - port: {{ .Values.service.port }}
+      targetPort: 6007
       protocol: TCP
-      name: {{ .name }}
-    {{- end }}
+      name: data-prep
   selector:
     {{- include "data-prep.selectorLabels" . | nindent 4 }}
diff --git a/helm-charts/common/data-prep/templates/tests/test-pod.yaml b/helm-charts/common/data-prep/templates/tests/test-pod.yaml
index f1217cd2a..63fb55886 100644
--- a/helm-charts/common/data-prep/templates/tests/test-pod.yaml
+++ b/helm-charts/common/data-prep/templates/tests/test-pod.yaml
@@ -18,12 +18,9 @@ spec:
       args:
         - |
           echo "test file" > /tmp/file1.txt;
-          {{- with index .Values.service.ports 0 }}
-          export port={{.port}};
-          {{- end }}
           max_retry=20;
           for ((i=1; i<=max_retry; i++)); do
-            curl http://{{ include "data-prep.fullname" . }}:$port/v1/dataprep -sS --fail-with-body \
+            curl http://{{ include "data-prep.fullname" . }}:{{ .Values.service.port }}/v1/dataprep -sS --fail-with-body \
             -X POST \
             -H "Content-Type: multipart/form-data" \
             -F "files=@/tmp/file1.txt" && break;
diff --git a/helm-charts/common/data-prep/values.yaml b/helm-charts/common/data-prep/values.yaml
index efe9dc4b7..458d7c0d5 100644
--- a/helm-charts/common/data-prep/values.yaml
+++ b/helm-charts/common/data-prep/values.yaml
@@ -38,19 +38,7 @@ securityContext:
 
 service:
   type: ClusterIP
-  # The following need to be modified after GenAIComps bug #282 is resolved.
-  # https://github.com/opea-project/GenAIComps/issues/282
-  ports:
-  # The default port for data prep service is 6007
-  - port: 6007
-    targetPort: 6007
-    name: data-prep
-  - port: 6008
-    targetPort: 6008
-    name: data-prep-get
-  - port: 6009
-    targetPort: 6009
-    name: data-prep-delete
+  port: 6007
 
 resources: {}
   # We usually recommend not to specify default resources and to leave this as a conscious
diff --git a/helm-charts/common/speecht5/templates/deployment.yaml b/helm-charts/common/speecht5/templates/deployment.yaml
index f29013eaf..475a99b89 100644
--- a/helm-charts/common/speecht5/templates/deployment.yaml
+++ b/helm-charts/common/speecht5/templates/deployment.yaml
@@ -74,6 +74,9 @@ spec:
           hostPath:
             path: {{ .Values.global.modelUseHostPath }}
             type: Directory
+          {{- else if .Values.global.modelUsePV }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePV }}
           {{- else }}
           emptyDir: {}
           {{- end }}
diff --git a/helm-charts/common/speecht5/values.yaml b/helm-charts/common/speecht5/values.yaml
index 2d217ddb6..9e614f737 100644
--- a/helm-charts/common/speecht5/values.yaml
+++ b/helm-charts/common/speecht5/values.yaml
@@ -84,4 +84,10 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # modelUseHostPath: Host directory path, this is good for one node test.
+  # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment
+  # comment out both will not have model cache directory and download the model from huggingface.
   modelUseHostPath: /mnt/opea-models
+  # modelUsePV: model-volume
diff --git a/helm-charts/common/tei/gaudi-values.yaml b/helm-charts/common/tei/gaudi-values.yaml
index b5dbbeede..9d1b2690a 100644
--- a/helm-charts/common/tei/gaudi-values.yaml
+++ b/helm-charts/common/tei/gaudi-values.yaml
@@ -9,6 +9,9 @@ image:
   repository: ghcr.io/huggingface/tei-gaudi
   tag: synapse_1.16
 
+securityContext:
+  readOnlyRootFilesystem: false
+
 resources:
   limits:
     habana.ai/gaudi: 1
diff --git a/helm-charts/common/tei/templates/deployment.yaml b/helm-charts/common/tei/templates/deployment.yaml
index 381226323..cbb2e3cad 100644
--- a/helm-charts/common/tei/templates/deployment.yaml
+++ b/helm-charts/common/tei/templates/deployment.yaml
@@ -78,6 +78,9 @@ spec:
           hostPath:
             path: {{ .Values.global.modelUseHostPath }}
             type: Directory
+          {{- else if .Values.global.modelUsePV }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePV }}
           {{- else }}
           emptyDir: {}
           {{- end }}
diff --git a/helm-charts/common/tei/values.yaml b/helm-charts/common/tei/values.yaml
index 54545d809..9ce013fdb 100644
--- a/helm-charts/common/tei/values.yaml
+++ b/helm-charts/common/tei/values.yaml
@@ -82,6 +82,10 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
-  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
-  # comment out modeluseHostPath if you want to download the model from huggingface
+
+  # Choose where to save your downloaded models
+  # modelUseHostPath: Host directory path, this is good for one node test.
+  # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment
+  # comment out both will not have model cache directory and download the model from huggingface.
   modelUseHostPath: /mnt/opea-models
+  # modelUsePV: model-volume
diff --git a/helm-charts/common/teirerank/templates/deployment.yaml b/helm-charts/common/teirerank/templates/deployment.yaml
index ff2c84a8e..3a75f8772 100644
--- a/helm-charts/common/teirerank/templates/deployment.yaml
+++ b/helm-charts/common/teirerank/templates/deployment.yaml
@@ -78,6 +78,9 @@ spec:
           hostPath:
             path: {{ .Values.global.modelUseHostPath }}
             type: Directory
+          {{- else if .Values.global.modelUsePV }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePV }}
           {{- else }}
           emptyDir: {}
           {{- end }}
diff --git a/helm-charts/common/teirerank/values.yaml b/helm-charts/common/teirerank/values.yaml
index b0062f1b8..2b8e90fc8 100644
--- a/helm-charts/common/teirerank/values.yaml
+++ b/helm-charts/common/teirerank/values.yaml
@@ -82,6 +82,10 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
-  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
-  # comment out modeluseHostPath if you want to download the model from huggingface
+
+  # Choose where to save your downloaded models
+  # modelUseHostPath: Host directory path, this is good for one node test.
+  # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment
+  # comment out both will not have model cache directory and download the model from huggingface.
   modelUseHostPath: /mnt/opea-models
+  # modelUsePV: model-volume
diff --git a/helm-charts/common/tgi/gaudi-values.yaml b/helm-charts/common/tgi/gaudi-values.yaml
index 988d1109d..07b740e55 100644
--- a/helm-charts/common/tgi/gaudi-values.yaml
+++ b/helm-charts/common/tgi/gaudi-values.yaml
@@ -9,11 +9,8 @@ image:
   repository: ghcr.io/huggingface/tgi-gaudi
   tag: "2.0.1"
 
-extraArgs:
-  - "--max-input-length"
-  - "1024"
-  - "--max-total-tokens"
-  - "2048"
+MAX_INPUT_LENGTH: "1024"
+MAX_TOTAL_TOKENS: "2048"
 
 resources:
   limits:
diff --git a/helm-charts/common/tgi/templates/configmap.yaml b/helm-charts/common/tgi/templates/configmap.yaml
index f11acef19..f5f0ff1a0 100644
--- a/helm-charts/common/tgi/templates/configmap.yaml
+++ b/helm-charts/common/tgi/templates/configmap.yaml
@@ -18,3 +18,9 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  {{- if .Values.MAX_INPUT_LENGTH }}
+  MAX_INPUT_LENGTH: {{ .Values.MAX_INPUT_LENGTH | quote }}
+  {{- end }}
+  {{- if .Values.MAX_TOTAL_TOKENS }}
+  MAX_TOTAL_TOKENS: {{ .Values.MAX_TOTAL_TOKENS | quote }}
+  {{- end }}
diff --git a/helm-charts/common/tgi/templates/deployment.yaml b/helm-charts/common/tgi/templates/deployment.yaml
index 967ef832a..9dea3dc15 100644
--- a/helm-charts/common/tgi/templates/deployment.yaml
+++ b/helm-charts/common/tgi/templates/deployment.yaml
@@ -45,10 +45,6 @@ spec:
             {{- end }}
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          {{- if .Values.extraArgs }}
-          args:
-            {{- toYaml .Values.extraArgs | nindent 12}}
-          {{- end }}
           volumeMounts:
             - mountPath: /data
               name: model-volume
@@ -78,6 +74,9 @@ spec:
           hostPath:
             path: {{ .Values.global.modelUseHostPath }}
             type: Directory
+          {{- else if .Values.global.modelUsePV }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePV }}
           {{- else }}
           emptyDir: {}
           {{- end }}
diff --git a/helm-charts/common/tgi/values.yaml b/helm-charts/common/tgi/values.yaml
index 7f7d78e92..26893ecd5 100644
--- a/helm-charts/common/tgi/values.yaml
+++ b/helm-charts/common/tgi/values.yaml
@@ -98,11 +98,18 @@ affinity: {}
 
 LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
 
+MAX_INPUT_LENGTH: ""
+MAX_TOTAL_TOKENS: ""
+
 global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
   HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
-  # set modelUseHostPath to host directory if you want to use hostPath volume for model storage
-  # comment out modeluseHostPath if you want to download the model from huggingface
+
+  # Choose where to save your downloaded models
+  # modelUseHostPath: Host directory path, this is good for one node test.
+  # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment
+  # comment out both will not have model cache directory and download the model from huggingface.
   modelUseHostPath: /mnt/opea-models
+  # modelUsePV: model-volume
diff --git a/helm-charts/common/whisper/templates/deployment.yaml b/helm-charts/common/whisper/templates/deployment.yaml
index d766c62dd..8a9c1a1f9 100644
--- a/helm-charts/common/whisper/templates/deployment.yaml
+++ b/helm-charts/common/whisper/templates/deployment.yaml
@@ -74,6 +74,9 @@ spec:
           hostPath:
             path: {{ .Values.global.modelUseHostPath }}
             type: Directory
+          {{- else if .Values.global.modelUsePV }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePV }}
           {{- else }}
           emptyDir: {}
           {{- end }}
diff --git a/helm-charts/common/whisper/values.yaml b/helm-charts/common/whisper/values.yaml
index 9a7bb6fbe..8c207fb8a 100644
--- a/helm-charts/common/whisper/values.yaml
+++ b/helm-charts/common/whisper/values.yaml
@@ -83,4 +83,10 @@ global:
   http_proxy: ""
   https_proxy: ""
   no_proxy: ""
+
+  # Choose where to save your downloaded models
+  # modelUseHostPath: Host directory path, this is good for one node test.
+  # modelUsePV: PersistentVolumeClaim(PVC) name, which is suitable for multinode deployment
+  # comment out both will not have model cache directory and download the model from huggingface.
   modelUseHostPath: /mnt/opea-models
+  # modelUsePV: model-volume
diff --git a/helm-charts/docsum/gaudi-values.yaml b/helm-charts/docsum/gaudi-values.yaml
index 20a959edd..66dd2d36e 100644
--- a/helm-charts/docsum/gaudi-values.yaml
+++ b/helm-charts/docsum/gaudi-values.yaml
@@ -8,8 +8,5 @@ tgi:
   resources:
     limits:
       habana.ai/gaudi: 1
-  extraArgs:
-    - "--max-input-length"
-    - "1024"
-    - "--max-total-tokens"
-    - "2048"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
diff --git a/helm-charts/docsum/values.yaml b/helm-charts/docsum/values.yaml
index 99bf2cf5f..65e458ae9 100644
--- a/helm-charts/docsum/values.yaml
+++ b/helm-charts/docsum/values.yaml
@@ -36,6 +36,11 @@ tolerations: []
 
 affinity: {}
 
+# To override values in subchart llm-uservice
+llm-uservice:
+  image:
+    repository: opea/llm-docsum-tgi
+
 # To override values in subchart tgi
 tgi:
   LLM_MODEL_ID: Intel/neural-chat-7b-v3-3
diff --git a/microservices-connector/cmd/router/main.go b/microservices-connector/cmd/router/main.go
index b5508007b..73e8aecd0 100644
--- a/microservices-connector/cmd/router/main.go
+++ b/microservices-connector/cmd/router/main.go
@@ -11,7 +11,7 @@
 package main
 
 import (
-	"bufio"
+	// "bufio"
 	"bytes"
 	"context"
 	"encoding/json"
@@ -43,14 +43,19 @@ var (
 	log             = logf.Log.WithName("GMCGraphRouter")
 	mcGraph         *mcv1alpha3.GMConnector
 	defaultNodeName = "root"
+	Prefix          = []byte("data: b'")
+	Suffix          = []byte("'\n\n")
+	DONE            = []byte("[DONE]")
+	Newline         = []byte("\n")
 )
 
 const (
-	ChunkSize   = 1024
+	BufferSize  = 1024
 	ServiceURL  = "serviceUrl"
 	ServiceNode = "node"
 	DataPrep    = "DataPrep"
 	Parameters  = "parameters"
+	Llm         = "Llm"
 )
 
 type EnsembleStepOutput struct {
@@ -63,6 +68,19 @@ type GMCGraphRoutingError struct {
 	Cause        string `json:"cause"`
 }
 
+type ReadCloser struct {
+	*bytes.Reader
+}
+
+func (ReadCloser) Close() error {
+	// Typically, you would release resources here, but for bytes.Reader, there's nothing to do.
+	return nil
+}
+
+func NewReadCloser(b []byte) io.ReadCloser {
+	return ReadCloser{bytes.NewReader(b)}
+}
+
 func (e *GMCGraphRoutingError) Error() string {
 	return fmt.Sprintf("%s. %s", e.ErrorMessage, e.Cause)
 }
@@ -127,7 +145,12 @@ func prepareErrorResponse(err error, errorMessage string) []byte {
 	return errorResponseBytes
 }
 
-func callService(step *mcv1alpha3.Step, serviceUrl string, input []byte, headers http.Header) ([]byte, int, error) {
+func callService(
+	step *mcv1alpha3.Step,
+	serviceUrl string,
+	input []byte,
+	headers http.Header,
+) (io.ReadCloser, int, error) {
 	defer timeTrack(time.Now(), "step", serviceUrl)
 	log.Info("Entering callService", "url", serviceUrl)
 
@@ -157,21 +180,7 @@ func callService(step *mcv1alpha3.Step, serviceUrl string, input []byte, headers
 		return nil, 500, err
 	}
 
-	defer func() {
-		if resp.Body != nil {
-			err := resp.Body.Close()
-			if err != nil {
-				log.Error(err, "An error has occurred while closing the response body")
-			}
-		}
-	}()
-
-	body, err := io.ReadAll(resp.Body)
-	if err != nil {
-		log.Error(err, "Error while reading the response")
-	}
-
-	return body, resp.StatusCode, err
+	return resp.Body, resp.StatusCode, nil
 }
 
 // Use step service name to create a K8s service if serviceURL is empty
@@ -190,7 +199,7 @@ func executeStep(
 	initInput []byte,
 	input []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	if step.NodeName != "" {
 		// when nodeName is specified make a recursive call for routing to next step
 		return routeStep(step.NodeName, graph, initInput, input, headers)
@@ -231,16 +240,16 @@ func handleSwitchNode(
 	initInput []byte,
 	request []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	var statusCode int
-	var responseBytes []byte
+	var responseBody io.ReadCloser
 	var err error
 	stepType := ServiceURL
 	if route.NodeName != "" {
 		stepType = ServiceNode
 	}
 	log.Info("Starting execution of step", "Node Name", route.NodeName, "type", stepType, "stepName", route.StepName)
-	if responseBytes, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil {
+	if responseBody, statusCode, err = executeStep(route, graph, initInput, request, headers); err != nil {
 		return nil, 500, err
 	}
 
@@ -253,7 +262,7 @@ func handleSwitchNode(
 			statusCode,
 		)
 	}
-	return responseBytes, statusCode, nil
+	return responseBody, statusCode, nil
 }
 
 func handleSwitchPipeline(nodeName string,
@@ -261,9 +270,10 @@ func handleSwitchPipeline(nodeName string,
 	initInput []byte,
 	input []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	currentNode := graph.Spec.Nodes[nodeName]
 	var statusCode int
+	var responseBody io.ReadCloser
 	var responseBytes []byte
 	var err error
 
@@ -284,29 +294,41 @@ func handleSwitchPipeline(nodeName string,
 			)
 			continue
 		}
+
+		// make sure that the process goes to the correct step
+		if route.Condition != "" {
+			if !pickupRouteByCondition(initInput, route.Condition) {
+				continue
+			}
+		}
+
 		log.Info("Current Step Information", "Node Name", nodeName, "Step Index", index)
 		request := input
+		if responseBody != nil {
+			responseBytes, err = io.ReadAll(responseBody)
+			if err != nil {
+				log.Error(err, "Error while reading the response body")
+				return nil, 500, err
+			}
+			log.Info("Print Previous Response Bytes", "Previous Response Bytes",
+				responseBytes, "Previous Status Code", statusCode)
+			err = responseBody.Close()
+			if err != nil {
+				log.Error(err, "Error while trying to close the responseBody in handleSwitchPipeline")
+			}
+		}
+
 		log.Info("Print Original Request Bytes", "Request Bytes", request)
 		if route.Data == "$response" && index > 0 {
 			request = mergeRequests(responseBytes, initReqData)
 		}
 		log.Info("Print New Request Bytes", "Request Bytes", request)
-		if route.Condition == "" {
-			responseBytes, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers)
-			if err != nil {
-				return responseBytes, statusCode, err
-			}
-		} else {
-			if pickupRouteByCondition(initInput, route.Condition) {
-				responseBytes, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers)
-				if err != nil {
-					return responseBytes, statusCode, err
-				}
-			}
+		responseBody, statusCode, err = handleSwitchNode(&route, graph, initInput, request, headers)
+		if err != nil {
+			return nil, statusCode, err
 		}
-		log.Info("Print Response Bytes", "Response Bytes", responseBytes, "Status Code", statusCode)
 	}
-	return responseBytes, statusCode, err
+	return responseBody, statusCode, err
 }
 
 func handleEnsemblePipeline(nodeName string,
@@ -314,7 +336,7 @@ func handleEnsemblePipeline(nodeName string,
 	initInput []byte,
 	input []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	currentNode := graph.Spec.Nodes[nodeName]
 	ensembleRes := make([]chan EnsembleStepOutput, len(currentNode.Steps))
 	errChan := make(chan error)
@@ -328,8 +350,12 @@ func handleEnsemblePipeline(nodeName string,
 		resultChan := make(chan EnsembleStepOutput)
 		ensembleRes[i] = resultChan
 		go func() {
-			output, statusCode, err := executeStep(step, graph, initInput, input, headers)
+			responseBody, statusCode, err := executeStep(step, graph, initInput, input, headers)
 			if err == nil {
+				output, rerr := io.ReadAll(responseBody)
+				if rerr != nil {
+					log.Error(rerr, "Error while reading the response body")
+				}
 				var res map[string]interface{}
 				if err = json.Unmarshal(output, &res); err == nil {
 					resultChan <- EnsembleStepOutput{
@@ -339,6 +365,10 @@ func handleEnsemblePipeline(nodeName string,
 					return
 				}
 			}
+			rerr := responseBody.Close()
+			if rerr != nil {
+				log.Error(rerr, "Error while trying to close the responseBody in handleEnsemblePipeline")
+			}
 			errChan <- err
 		}()
 	}
@@ -361,7 +391,8 @@ func handleEnsemblePipeline(nodeName string,
 					ensembleStepOutput.StepStatusCode,
 				)
 				stepResponse, _ := json.Marshal(ensembleStepOutput.StepResponse)
-				return stepResponse, ensembleStepOutput.StepStatusCode, nil
+				stepIOReader := NewReadCloser(stepResponse)
+				return stepIOReader, ensembleStepOutput.StepStatusCode, nil
 			} else {
 				response[key] = ensembleStepOutput.StepResponse
 			}
@@ -371,7 +402,8 @@ func handleEnsemblePipeline(nodeName string,
 	}
 	// return json.Marshal(response)
 	combinedResponse, _ := json.Marshal(response) // TODO check if you need err handling for Marshalling
-	return combinedResponse, 200, nil
+	combinedIOReader := NewReadCloser(combinedResponse)
+	return combinedIOReader, 200, nil
 }
 
 func handleSequencePipeline(nodeName string,
@@ -379,9 +411,10 @@ func handleSequencePipeline(nodeName string,
 	initInput []byte,
 	input []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	currentNode := graph.Spec.Nodes[nodeName]
 	var statusCode int
+	var responseBody io.ReadCloser
 	var responseBytes []byte
 	var err error
 
@@ -409,6 +442,20 @@ func handleSequencePipeline(nodeName string,
 		log.Info("Starting execution of step", "type", stepType, "stepName", step.StepName)
 		request := input
 		log.Info("Print Original Request Bytes", "Request Bytes", request)
+		if responseBody != nil {
+			responseBytes, err = io.ReadAll(responseBody)
+			if err != nil {
+				log.Error(err, "Error while reading the response body")
+				return nil, 500, err
+			}
+			log.Info("Print Previous Response Bytes", "Previous Response Bytes",
+				responseBytes, "Previous Status Code", statusCode)
+			err := responseBody.Close()
+			if err != nil {
+				log.Error(err, "Error while trying to close the responseBody in handleSequencePipeline")
+			}
+		}
+
 		if step.Data == "$response" && i > 0 {
 			request = mergeRequests(responseBytes, initReqData)
 		}
@@ -419,13 +466,12 @@ func handleSequencePipeline(nodeName string,
 			}
 			// if the condition does not match for the step in the sequence we stop and return the response
 			if !gjson.GetBytes(responseBytes, step.Condition).Exists() {
-				return responseBytes, 500, nil
+				return responseBody, 500, nil
 			}
 		}
-		if responseBytes, statusCode, err = executeStep(step, graph, initInput, request, headers); err != nil {
+		if responseBody, statusCode, err = executeStep(step, graph, initInput, request, headers); err != nil {
 			return nil, 500, err
 		}
-		log.Info("Print Response Bytes", "Response Bytes", responseBytes, "Status Code", statusCode)
 		/*
 		   Only if a step is a hard dependency, we will check for its success.
 		*/
@@ -439,18 +485,18 @@ func handleSequencePipeline(nodeName string,
 					statusCode,
 				)
 				// Stop the execution of sequence right away if step is a hard dependency and is unsuccessful
-				return responseBytes, statusCode, nil
+				return responseBody, statusCode, nil
 			}
 		}
 	}
-	return responseBytes, statusCode, nil
+	return responseBody, statusCode, nil
 }
 
 func routeStep(nodeName string,
 	graph mcv1alpha3.GMConnector,
 	initInput, input []byte,
 	headers http.Header,
-) ([]byte, int, error) {
+) (io.ReadCloser, int, error) {
 	defer timeTrack(time.Now(), "node", nodeName)
 	currentNode := graph.Spec.Nodes[nodeName]
 	log.Info("Current Node", "Node Name", nodeName)
@@ -478,9 +524,14 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
 	go func() {
 		defer close(done)
 
-		inputBytes, _ := io.ReadAll(req.Body)
-		response, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header)
+		inputBytes, err := io.ReadAll(req.Body)
+		if err != nil {
+			log.Error(err, "failed to read request body")
+			http.Error(w, "failed to read request body", http.StatusBadRequest)
+			return
+		}
 
+		responseBody, statusCode, err := routeStep(defaultNodeName, *mcGraph, inputBytes, inputBytes, req.Header)
 		if err != nil {
 			log.Error(err, "failed to process request")
 			w.Header().Set("Content-Type", "application/json")
@@ -490,37 +541,53 @@ func mcGraphHandler(w http.ResponseWriter, req *http.Request) {
 			}
 			return
 		}
-		if json.Valid(response) {
-			w.Header().Set("Content-Type", "application/json")
-		}
-		w.WriteHeader(statusCode)
-
-		writer := bufio.NewWriter(w)
 		defer func() {
-			if err := writer.Flush(); err != nil {
-				log.Error(err, "error flushing writer when processing response")
+			err := responseBody.Close()
+			if err != nil {
+				log.Error(err, "Error while trying to close the responseBody in mcGraphHandler")
 			}
 		}()
 
-		for start := 0; start < len(response); start += ChunkSize {
-			end := start + ChunkSize
-			if end > len(response) {
-				end = len(response)
-			}
-			if _, err := writer.Write(response[start:end]); err != nil {
-				log.Error(err, "failed to write mcGraphHandler response")
+		w.Header().Set("Content-Type", "application/json")
+		buffer := make([]byte, BufferSize)
+		for {
+			n, err := responseBody.Read(buffer)
+			if err != nil && err != io.EOF {
+				log.Error(err, "failed to read from response body")
+				http.Error(w, "failed to read from response body", http.StatusInternalServerError)
 				return
 			}
+			if n == 0 {
+				break
+			}
 
-			if err := writer.Flush(); err != nil {
-				log.Error(err, "error flushing writer when processing response")
+			/*sliceBF := buffer[:n]
+			if !bytes.HasPrefix(sliceBF, DONE) {
+				sliceBF = bytes.TrimPrefix(sliceBF, Prefix)
+				sliceBF = bytes.TrimSuffix(sliceBF, Suffix)
+			} else {
+				sliceBF = bytes.Join([][]byte{Newline, sliceBF}, nil)
+			}
+			log.Info("[llm - chat_stream] chunk:", "Buffer", string(sliceBF))*/
+
+			// Write the chunk to the ResponseWriter
+			if _, err := w.Write(buffer[:n]); err != nil {
+				log.Error(err, "failed to write to ResponseWriter")
+				return
+			}
+			// Flush the data to the client immediately
+			if flusher, ok := w.(http.Flusher); ok {
+				flusher.Flush()
+			} else {
+				log.Error(errors.New("unable to flush data"), "ResponseWriter does not support flushing")
+				return
 			}
 		}
 	}()
 
 	select {
 	case <-ctx.Done():
-		log.Error(errors.New("failed to process request"), "request timed out")
+		log.Error(errors.New("request timed out"), "failed to process request")
 		http.Error(w, "request timed out", http.StatusGatewayTimeout)
 	case <-done:
 		log.Info("mcGraphHandler is done")
diff --git a/microservices-connector/cmd/router/main_test.go b/microservices-connector/cmd/router/main_test.go
index 25afa9129..6f2d7e54b 100644
--- a/microservices-connector/cmd/router/main_test.go
+++ b/microservices-connector/cmd/router/main_test.go
@@ -116,7 +116,12 @@ func TestSimpleModelChainer(t *testing.T) {
 		return
 	}
 	var response map[string]interface{}
-	err = json.Unmarshal(res, &response)
+	responseBytes, rerr := io.ReadAll(res)
+	if rerr != nil {
+		t.Fatalf("Error while reading the response body: %v", rerr)
+		return
+	}
+	err = json.Unmarshal(responseBytes, &response)
 	if err != nil {
 		return
 	}
@@ -217,7 +222,12 @@ func TestSimpleServiceEnsemble(t *testing.T) {
 		return
 	}
 	var response map[string]interface{}
-	err = json.Unmarshal(res, &response)
+	responseBytes, rerr := io.ReadAll(res)
+	if rerr != nil {
+		t.Fatalf("Error while reading the response body")
+		return
+	}
+	err = json.Unmarshal(responseBytes, &response)
 	if err != nil {
 		return
 	}
@@ -452,7 +462,12 @@ func TestMCWithCondition(t *testing.T) {
 		return
 	}
 	var response map[string]interface{}
-	err = json.Unmarshal(res, &response)
+	responseBytes, rerr := io.ReadAll(res)
+	if rerr != nil {
+		t.Fatalf("Error while reading the response body")
+		return
+	}
+	err = json.Unmarshal(responseBytes, &response)
 	if err != nil {
 		return
 	}
@@ -536,7 +551,12 @@ func TestCallServiceWhenNoneHeadersToPropagateIsEmpty(t *testing.T) {
 		return
 	}
 	var response map[string]interface{}
-	err = json.Unmarshal(res, &response)
+	responseBytes, rerr := io.ReadAll(res)
+	if rerr != nil {
+		t.Fatalf("Error while reading the response body")
+		return
+	}
+	err = json.Unmarshal(responseBytes, &response)
 	if err != nil {
 		return
 	}
diff --git a/microservices-connector/config/manifests/data-prep.yaml b/microservices-connector/config/manifests/data-prep.yaml
index 7212b6751..f94954922 100644
--- a/microservices-connector/config/manifests/data-prep.yaml
+++ b/microservices-connector/config/manifests/data-prep.yaml
@@ -48,14 +48,6 @@ spec:
       targetPort: 6007
       protocol: TCP
       name: data-prep
-    - port: 6008
-      targetPort: 6008
-      protocol: TCP
-      name: data-prep-get
-    - port: 6009
-      targetPort: 6009
-      protocol: TCP
-      name: data-prep-delete
   selector:
     app.kubernetes.io/name: data-prep
     app.kubernetes.io/instance: data-prep
@@ -112,12 +104,6 @@ spec:
             - name: data-prep
               containerPort: 6007
               protocol: TCP
-            # The following need to be modified after GenAIComps bug #282 is resolved.
-            # https://github.com/opea-project/GenAIComps/issues/282
-            - containerPort: 6008
-              protocol: TCP
-            - containerPort: 6009
-              protocol: TCP
           volumeMounts:
             - mountPath: /tmp
               name: tmp
diff --git a/microservices-connector/config/manifests/tgi_gaudi.yaml b/microservices-connector/config/manifests/tgi_gaudi.yaml
index 9b1f381ae..00ea67c90 100644
--- a/microservices-connector/config/manifests/tgi_gaudi.yaml
+++ b/microservices-connector/config/manifests/tgi_gaudi.yaml
@@ -24,6 +24,8 @@ data:
   NUMBA_CACHE_DIR: "/tmp"
   TRANSFORMERS_CACHE: "/tmp/transformers_cache"
   HF_HOME: "/tmp/.cache/huggingface"
+  MAX_INPUT_LENGTH: "1024"
+  MAX_TOTAL_TOKENS: "2048"
 ---
 # Source: tgi/templates/service.yaml
 # Copyright (C) 2024 Intel Corporation
@@ -90,11 +92,6 @@ spec:
             {}
           image: "ghcr.io/huggingface/tgi-gaudi:2.0.1"
           imagePullPolicy: IfNotPresent
-          args:
-            - --max-input-length
-            - "1024"
-            - --max-total-tokens
-            - "2048"
           volumeMounts:
             - mountPath: /data
               name: model-volume