From cdd38a2d17e9ba414425f9d74b11dacfe318c3e9 Mon Sep 17 00:00:00 2001 From: Lianhao Lu Date: Thu, 9 Jan 2025 05:49:35 +0000 Subject: [PATCH] Modify embedding-usvc to support multimodal embedding - Change embedding-usvc chart to adapt to latest chagnes - Support multimodal embedding Signed-off-by: Lianhao Lu --- helm-charts/common/embedding-usvc/.helmignore | 2 + helm-charts/common/embedding-usvc/Chart.yaml | 4 + helm-charts/common/embedding-usvc/README.md | 61 ++++--- .../embedding-usvc/ci-multimodal-values.yaml | 14 ++ .../common/embedding-usvc/ci-values.yaml | 5 + .../embedding-usvc/templates/configmap.yaml | 18 +- .../embedding-usvc/templates/deployment.yaml | 2 +- .../templates/tests/test-pod.yaml | 6 +- helm-charts/common/embedding-usvc/values.yaml | 53 +++--- helm-charts/common/mm-embedding/.helmignore | 25 +++ helm-charts/common/mm-embedding/Chart.yaml | 9 + helm-charts/common/mm-embedding/README.md | 58 ++++++ .../common/mm-embedding/ci-clip-values.yaml | 1 + .../common/mm-embedding/ci-values.yaml | 1 + .../common/mm-embedding/gaudi-values.yaml | 22 +++ .../mm-embedding/templates/_helpers.tpl | 64 +++++++ .../mm-embedding/templates/configmap.yaml | 30 ++++ .../mm-embedding/templates/deployment.yaml | 167 ++++++++++++++++++ .../templates/horizontal-pod-autoscaler.yaml | 35 ++++ .../mm-embedding/templates/service.yaml | 18 ++ .../templates/serviceaccount.yaml | 16 ++ .../templates/servicemonitor.yaml | 18 ++ .../templates/tests/test-pod.yaml | 34 ++++ helm-charts/common/mm-embedding/values.yaml | 148 ++++++++++++++++ .../mm-embedding/variant_clip-values.yaml | 10 ++ helm-charts/common/web-retriever/.helmignore | 2 + helm-charts/common/web-retriever/values.yaml | 21 +-- 27 files changed, 780 insertions(+), 64 deletions(-) create mode 100644 helm-charts/common/embedding-usvc/ci-multimodal-values.yaml create mode 100644 helm-charts/common/mm-embedding/.helmignore create mode 100644 helm-charts/common/mm-embedding/Chart.yaml create mode 100644 helm-charts/common/mm-embedding/README.md create mode 120000 helm-charts/common/mm-embedding/ci-clip-values.yaml create mode 120000 helm-charts/common/mm-embedding/ci-values.yaml create mode 100644 helm-charts/common/mm-embedding/gaudi-values.yaml create mode 100644 helm-charts/common/mm-embedding/templates/_helpers.tpl create mode 100644 helm-charts/common/mm-embedding/templates/configmap.yaml create mode 100644 helm-charts/common/mm-embedding/templates/deployment.yaml create mode 100644 helm-charts/common/mm-embedding/templates/horizontal-pod-autoscaler.yaml create mode 100644 helm-charts/common/mm-embedding/templates/service.yaml create mode 100644 helm-charts/common/mm-embedding/templates/serviceaccount.yaml create mode 100644 helm-charts/common/mm-embedding/templates/servicemonitor.yaml create mode 100644 helm-charts/common/mm-embedding/templates/tests/test-pod.yaml create mode 100644 helm-charts/common/mm-embedding/values.yaml create mode 100644 helm-charts/common/mm-embedding/variant_clip-values.yaml diff --git a/helm-charts/common/embedding-usvc/.helmignore b/helm-charts/common/embedding-usvc/.helmignore index 0e8a0eb36..d2c43a2ac 100644 --- a/helm-charts/common/embedding-usvc/.helmignore +++ b/helm-charts/common/embedding-usvc/.helmignore @@ -21,3 +21,5 @@ .idea/ *.tmproj .vscode/ +# CI values +ci*-values.yaml diff --git a/helm-charts/common/embedding-usvc/Chart.yaml b/helm-charts/common/embedding-usvc/Chart.yaml index 7edaba721..a2c8e8577 100644 --- a/helm-charts/common/embedding-usvc/Chart.yaml +++ b/helm-charts/common/embedding-usvc/Chart.yaml @@ -13,3 +13,7 @@ dependencies: version: 0-latest repository: file://../tei condition: tei.enabled + - name: mm-embedding + version: 0-latest + repository: file://../mm-embedding + condition: mm-embedding.enabled diff --git a/helm-charts/common/embedding-usvc/README.md b/helm-charts/common/embedding-usvc/README.md index 2bc0ed4bd..85f070988 100644 --- a/helm-charts/common/embedding-usvc/README.md +++ b/helm-charts/common/embedding-usvc/README.md @@ -1,30 +1,42 @@ # embedding-usvc -Helm chart for deploying embedding microservice. +Helm chart for deploying OPEA embedding microservice. -embedding-usvc depends on TEI, set TEI_EMBEDDING_ENDPOINT. +## Installing the chart -## (Option1): Installing the chart separately +The OPEA embedding microservice depends on one of the following backend services: -First, you need to install the tei chart, please refer to the [tei](../tei) chart for more information. +- text embedding inference: please refer to [tei](../tei) chart for more information -After you've deployted the tei chart successfully, please run `kubectl get svc` to get the tei service endpoint, i.e. `http://tei`. +- multimodal embedding bridgetower: please refer to [mm-embedding](../mm-embedding) chart for more information. + +- prediction guard: please refert to external [Prediction Guard](https://predictionguard.com) for more information. + +First, you need to get the dependent service deployed, i.e. deploy the tei helm chart, mm-embedding helm chart, or contact prediction guard to get access info. + +After you've deployed the successfully, please run `kubectl get svc` to get the backend service URL, e.g. `http://tei`, `http://mm-embedding`. To install the embedding-usvc chart, run the following: ```console cd GenAIInfra/helm-charts/common/embedding-usvc -export TEI_EMBEDDING_ENDPOINT="http://tei" helm dependency update -helm install embedding-usvc . --set TEI_EMBEDDING_ENDPOINT=${TEI_EMBEDDING_ENDPOINT} -``` -## (Option2): Installing the chart with dependencies automatically +# Use tei as the backend(default) +export EMBEDDING_COMPONENT_NAME="OPEA_TEI_EMBEDDING" +export EMBEDDING_ENDPOINT="http://tei" +helm install embedding-usvc . --set EMBEDDING_COMPONENT_NAME=${EMBEDDING_COMPONENT_NAME} --set TEI_EMBEDDING_ENDPOINT=${EMBEDDING_ENDPOINT} + +# Use multimodal embedding bridgetower as the backend +# export EMBEDDING_COMPONENT_NAME="OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" +# export EMBEDDING_ENDPOINT="http://mm-embedding" +# helm install embedding-usvc . --set EMBEDDING_COMPONENT_NAME=${EMBEDDING_COMPONENT_NAME} --set MMEI_EMBEDDING_ENDPOINT=${EMBEDDING_ENDPOINT} --set MULTIMODAL_EMBEDDING=true + +# Use predcition guard as the backend +# export EMBEDDING_COMPONENT_NAME="OPEA_PREDICTIONGUARD_EMBEDDING" +# export API_KEY= +# helm install embedding-usvc . --set EMBEDDING_COMPONENT_NAME=${EMBEDDING_COMPONENT_NAME} --set PREDICTIONGUARD_API_KEY=${API_KEY} -```console -cd GenAIInfra/helm-charts/common/embedding-usvc -helm dependency update -helm install embedding-usvc . --set tei.enabled=true ``` ## Verify @@ -36,17 +48,24 @@ Then run the command `kubectl port-forward svc/embedding-usvc 6000:6000` to expo Open another terminal and run the following command to verify the service if working: ```console +# Verify with tei or prediction guard backend: +curl http://localhost:6000/v1/embeddings \ + -X POST \ + -H 'Content-Type: application/json' \ + -d '{"input":"What is Deep Learning?"}' + +# Verify with multimodal embedding bridgetower backend: curl http://localhost:6000/v1/embeddings \ -X POST \ - -d '{"text":"hello"}' \ - -H 'Content-Type: application/json' + -H 'Content-Type: application/json' \ + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}' ``` ## Values -| Key | Type | Default | Description | -| ---------------------- | ------ | ---------------------- | ----------- | -| image.repository | string | `"opea/embedding-tei"` | | -| service.port | string | `"6000"` | | -| TEI_EMBEDDING_ENDPOINT | string | `""` | | -| global.monitoring | bool | `false` | | +| Key | Type | Default | Description | +| ------------------------ | ------ | ---------------------- | -------------------------- | +| image.repository | string | `"opea/embedding-tei"` | | +| service.port | string | `"6000"` | | +| EMBEDDING_COMPONENT_NAME | string | `"OPEA_TEI_EMBEDDING"` | backend service to talk to | +| global.monitoring | bool | `false` | | diff --git a/helm-charts/common/embedding-usvc/ci-multimodal-values.yaml b/helm-charts/common/embedding-usvc/ci-multimodal-values.yaml new file mode 100644 index 000000000..ae9533bfa --- /dev/null +++ b/helm-charts/common/embedding-usvc/ci-multimodal-values.yaml @@ -0,0 +1,14 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for embedding-usvc. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +tei: + enabled: false +mm-embedding: + enabled: true + +MULTIMODAL_EMBEDDING: true +EMBEDDING_COMPONENT_NAME: "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" diff --git a/helm-charts/common/embedding-usvc/ci-values.yaml b/helm-charts/common/embedding-usvc/ci-values.yaml index 543c69570..4d16d47bc 100644 --- a/helm-charts/common/embedding-usvc/ci-values.yaml +++ b/helm-charts/common/embedding-usvc/ci-values.yaml @@ -7,3 +7,8 @@ tei: enabled: true +mm-embedding: + enabled: false + +MULTIMODAL_EMBEDDING: false +EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" diff --git a/helm-charts/common/embedding-usvc/templates/configmap.yaml b/helm-charts/common/embedding-usvc/templates/configmap.yaml index 5ec5904ad..54014624c 100644 --- a/helm-charts/common/embedding-usvc/templates/configmap.yaml +++ b/helm-charts/common/embedding-usvc/templates/configmap.yaml @@ -8,15 +8,29 @@ metadata: labels: {{- include "embedding-usvc.labels" . | nindent 4 }} data: + MULTIMODAL_EMBEDDING: {{ .Values.MULTIMODAL_EMBEDDING | quote }} + EMBEDDING_COMPONENT_NAME: {{ .Values.EMBEDDING_COMPONENT_NAME | quote }} + {{- if eq .Values.EMBEDDING_COMPONENT_NAME "OPEA_TEI_EMBEDDING" }} {{- if .Values.TEI_EMBEDDING_ENDPOINT }} - TEI_EMBEDDING_ENDPOINT: {{ .Values.TEI_EMBEDDING_ENDPOINT | quote }} + TEI_EMBEDDING_ENDPOINT: {{ tpl .Values.TEI_EMBEDDING_ENDPOINT . | quote }} {{- else }} TEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-tei" {{- end }} + {{- else if eq .Values.EMBEDDING_COMPONENT_NAME "OPEA_PREDICTIONGUARD_EMBEDDING" }} + PG_EMBEDDING_MODEL_NAME: {{ .Values.PG_EMBEDDING_MODEL_NAME | quote }} + PREDICTIONGUARD_API_KEY: {{ .Values.PREDICTIONGUARD_API_KEY | quote }} + {{- else if eq .Values.EMBEDDING_COMPONENT_NAME "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" }} + MULTIMODAL_EMBEDDING: "true" + {{- if .Values.MMEI_EMBEDDING_ENDPOINT }} + MMEI_EMBEDDING_ENDPOINT: {{ tpl .Values.MMEI_EMBEDDING_ENDPOINT . | quote }} + {{- else }} + MMEI_EMBEDDING_ENDPOINT: "http://{{ .Release.Name }}-mm-embedding" + {{- end }} + {{- end }} http_proxy: {{ .Values.global.http_proxy | quote }} https_proxy: {{ .Values.global.https_proxy | quote }} {{- if and (not .Values.TEI_EMBEDDING_ENDPOINT) (or .Values.global.http_proxy .Values.global.https_proxy) }} - no_proxy: "{{ .Release.Name }}-tei,{{ .Values.global.no_proxy }}" + no_proxy: "{{ .Release.Name }}-tei,{{ .Release.Name }}-mm-embedding,{{ .Values.global.no_proxy }}" {{- else }} no_proxy: {{ .Values.global.no_proxy | quote }} {{- end }} diff --git a/helm-charts/common/embedding-usvc/templates/deployment.yaml b/helm-charts/common/embedding-usvc/templates/deployment.yaml index 459389518..b31b776df 100644 --- a/helm-charts/common/embedding-usvc/templates/deployment.yaml +++ b/helm-charts/common/embedding-usvc/templates/deployment.yaml @@ -29,7 +29,7 @@ spec: securityContext: {{- toYaml .Values.podSecurityContext | nindent 8 }} containers: - - name: {{ .Release.Name }} + - name: {{ .Chart.Name }} envFrom: - configMapRef: name: {{ include "embedding-usvc.fullname" . }}-config diff --git a/helm-charts/common/embedding-usvc/templates/tests/test-pod.yaml b/helm-charts/common/embedding-usvc/templates/tests/test-pod.yaml index 7acc267b3..b43679979 100644 --- a/helm-charts/common/embedding-usvc/templates/tests/test-pod.yaml +++ b/helm-charts/common/embedding-usvc/templates/tests/test-pod.yaml @@ -21,7 +21,11 @@ spec: for ((i=1; i<=max_retry; i++)); do curl http://{{ include "embedding-usvc.fullname" . }}:{{ .Values.service.port }}/v1/embeddings -sS --fail-with-body \ -X POST \ - -d '{"text":"hello"}' \ + {{- if eq .Values.EMBEDDING_COMPONENT_NAME "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" }} + -d '{"text": {"text" : "This is some sample text."}, "image" : {"url": "https://github.com/docarray/docarray/blob/main/tests/toydata/image-data/apple.png?raw=true"}}' \ + {{- else }} + -d '{"input":"What is Deep Learning?"}' \ + {{- end }} -H 'Content-Type: application/json' && break; curlcode=$? if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; diff --git a/helm-charts/common/embedding-usvc/values.yaml b/helm-charts/common/embedding-usvc/values.yaml index a4a36f8ab..1c1233388 100644 --- a/helm-charts/common/embedding-usvc/values.yaml +++ b/helm-charts/common/embedding-usvc/values.yaml @@ -5,18 +5,30 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. -tei: - enabled: false - -replicaCount: 1 - -# Set it as a non-null string, such as true, if you want to enable logging facility, -# otherwise, keep it as "" to disable it. +# Configurations for OPEA microservice mm-embedding +# Set it as a non-null string, such as true, if you want to enable logging facility. LOGFLAG: "" +# embedding need to talk to different backend services: tei, multimodal-bridgetower, predictionGuard +# Default is to use the tei(text-embedding-inference) as the backend +MULTIMODAL_EMBEDDING: false +EMBEDDING_COMPONENT_NAME: "OPEA_TEI_EMBEDDING" TEI_EMBEDDING_ENDPOINT: "" + +# Uncomment and set the following settings to use predictionGuard as the backend +# EMBEDDING_COMPONENT_NAME: "OPEA_PREDICTIONGUARD_EMBEDDING" +# PG_EMBEDDING_MODEL_NAME: "bridgetower-large-itm-mlm-itc" +# PREDICTIONGUARD_API_KEY: "" + +# Uncomment and set the following settings to use embedding-multimodal-bridgetower as the backend +# MULTIMODAL_EMBEDDING: true +# EMBEDDING_COMPONENT_NAME: "OPEA_MULTIMODAL_EMBEDDING_BRIDGETOWER" +# MMEI_EMBEDDING_ENDPOINT: "" + +replicaCount: 1 + image: - repository: opea/embedding-tei + repository: opea/embedding # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. # pullPolicy: "" # Overrides the image tag whose default is the chart appVersion. @@ -58,25 +70,14 @@ service: # The default port for embedding service is 9000 port: 6000 -resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. +resources: # limits: # cpu: 100m # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + requests: + cpu: 100m + memory: 128Mi -livenessProbe: - httpGet: - path: v1/health_check - port: embedding-usvc - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 24 readinessProbe: httpGet: path: v1/health_check @@ -111,3 +112,9 @@ global: # Prometheus Helm install release name for serviceMonitor prometheusRelease: prometheus-stack + +# The following is for CI tests only +tei: + enabled: false +mm-embedding: + enabled: false diff --git a/helm-charts/common/mm-embedding/.helmignore b/helm-charts/common/mm-embedding/.helmignore new file mode 100644 index 000000000..d2c43a2ac --- /dev/null +++ b/helm-charts/common/mm-embedding/.helmignore @@ -0,0 +1,25 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ +# CI values +ci*-values.yaml diff --git a/helm-charts/common/mm-embedding/Chart.yaml b/helm-charts/common/mm-embedding/Chart.yaml new file mode 100644 index 000000000..0af00c4c8 --- /dev/null +++ b/helm-charts/common/mm-embedding/Chart.yaml @@ -0,0 +1,9 @@ +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v2 +appVersion: "1.1" +description: A Helm chart for deploying opea multimodel embedding microservices +name: mm-embedding +type: application +version: 0-latest diff --git a/helm-charts/common/mm-embedding/README.md b/helm-charts/common/mm-embedding/README.md new file mode 100644 index 000000000..7726be33c --- /dev/null +++ b/helm-charts/common/mm-embedding/README.md @@ -0,0 +1,58 @@ +# OPEA mm-embedding microservice + +Helm chart for deploying OPEA multimodal embedding service. + +## Installing the Chart + +To install the chart, run the following: + +```console +cd GenAIInfra/helm-charts/common +export MODELDIR=/mnt/opea-models +export HFTOKEN="insert-your-huggingface-token-here" +# To deploy embedding-multimodal-bridgetower microserice on CPU +helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} +# To deploy embedding-multimodal-bridgetower microserice on Gaudi +# helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values mm-embedding/gaudi-values.yaml +# To deploy embedding-multimodal-clip microserice on CPU +helm install mm-embedding mm-embedding --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values mm-embedding/variant_clip-values.yaml +``` + +By default, the embedding-multimodal-bridgetower service will downloading the "BridgeTower/bridgetower-large-itm-mlm-itc" download which is about 3.5GB, and the embedding-multimodal-clip service will download the "openai/clip-vit-base-patch32" model which is about 1.7GB. + +If you already cached the model locally, you can pass it to container like this example: + +MODELDIR=/mnt/opea-models + +MODELNAME="/data/models--BridgeTower--bridgetower-large-itm-mlm-itc" + +## Verify + +To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng and in ready state. + +Then run the command `kubectl port-forward svc/mm-embedding 6990:6990` to expose the mm-embedding service for access. + +Open another terminal and run the following command to verify the service if working: + +```console +# Verify with embedding-multimodal-bridgetower +curl http://localhost:6990/v1/encode \ + -XPOST \ + -d '{"text":"This is example"}' \ + -H 'Content-Type: application/json' + +# Verify with embedding-multimodal-clip +curl http://localhost:6990/v1/embeddings \ + -XPOST \ + -d '{"text":"This is example"}' \ + -H 'Content-Type: application/json' +``` + +## Values + +| Key | Type | Default | Description | +| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token | +| global.modelUseHostPath | string | `""` | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. | +| autoscaling.enabled | bool | `false` | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling! | +| global.monitoring | bool | `false` | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling! | diff --git a/helm-charts/common/mm-embedding/ci-clip-values.yaml b/helm-charts/common/mm-embedding/ci-clip-values.yaml new file mode 120000 index 000000000..4c7b02d4f --- /dev/null +++ b/helm-charts/common/mm-embedding/ci-clip-values.yaml @@ -0,0 +1 @@ +variant_clip-values.yaml \ No newline at end of file diff --git a/helm-charts/common/mm-embedding/ci-values.yaml b/helm-charts/common/mm-embedding/ci-values.yaml new file mode 120000 index 000000000..7d1010096 --- /dev/null +++ b/helm-charts/common/mm-embedding/ci-values.yaml @@ -0,0 +1 @@ +values.yaml \ No newline at end of file diff --git a/helm-charts/common/mm-embedding/gaudi-values.yaml b/helm-charts/common/mm-embedding/gaudi-values.yaml new file mode 100644 index 000000000..1ddc966c5 --- /dev/null +++ b/helm-charts/common/mm-embedding/gaudi-values.yaml @@ -0,0 +1,22 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +accelDevice: "gaudi" + +image: + repository: opea/embedding-multimodal-bridgetower-gaudi + tag: "latest" + +resources: + limits: + habana.ai/gaudi: 1 + +readinessProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 +startupProbe: + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 1 + failureThreshold: 120 diff --git a/helm-charts/common/mm-embedding/templates/_helpers.tpl b/helm-charts/common/mm-embedding/templates/_helpers.tpl new file mode 100644 index 000000000..fd47bcbe7 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/_helpers.tpl @@ -0,0 +1,64 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "mm-embedding.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "mm-embedding.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "mm-embedding.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "mm-embedding.labels" -}} +helm.sh/chart: {{ include "mm-embedding.chart" . }} +{{ include "mm-embedding.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "mm-embedding.selectorLabels" -}} +app.kubernetes.io/name: {{ include "mm-embedding.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "mm-embedding.serviceAccountName" -}} +{{- if .Values.global.sharedSAName }} +{{- .Values.global.sharedSAName }} +{{- else if .Values.serviceAccount.create }} +{{- default (include "mm-embedding.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/helm-charts/common/mm-embedding/templates/configmap.yaml b/helm-charts/common/mm-embedding/templates/configmap.yaml new file mode 100644 index 000000000..31f6fc571 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/configmap.yaml @@ -0,0 +1,30 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "mm-embedding.fullname" . }}-config + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} +data: + PORT: {{ .Values.service.port | quote }} + HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}} + {{- if .Values.global.HF_ENDPOINT }} + HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}} + {{- end }} + http_proxy: {{ .Values.global.http_proxy | quote }} + https_proxy: {{ .Values.global.https_proxy | quote }} + no_proxy: {{ .Values.global.no_proxy | quote }} + LOGFLAG: {{ .Values.LOGFLAG | quote }} + HF_HOME: "/tmp/.cache/huggingface" + HF_HUB_CACHE: "/data" + {{- if contains "embedding-multimodal-bridgetower-gaudi" .Values.image.repository }} + HABANA_LOGS: "/tmp/habana_logs" + {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }} + PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }} + {{- end }} + {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }} + OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}} + {{- end }} + {{- end }} diff --git a/helm-charts/common/mm-embedding/templates/deployment.yaml b/helm-charts/common/mm-embedding/templates/deployment.yaml new file mode 100644 index 000000000..e538404c3 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/deployment.yaml @@ -0,0 +1,167 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "mm-embedding.fullname" . }} + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} +spec: + {{- if ne (int .Values.replicaCount) 1 }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "mm-embedding.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "mm-embedding.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "mm-embedding.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.podSecurityContext | nindent 8 }} + {{- if not (hasPrefix "/data/" .Values.EMBEDDING_MODEL_ID) }} + initContainers: + - name: model-downloader + envFrom: + - configMapRef: + name: {{ include "mm-embedding.fullname" . }}-config + securityContext: + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + {{- if hasKey .Values.securityContext "runAsGroup" }} + runAsGroup: {{ .Values.securityContext.runAsGroup }} + {{- end }} + capabilities: + drop: + - ALL + add: + - DAC_OVERRIDE + # To be able to make data model directory group writable for + # previously downloaded model by old versions of helm chart + - FOWNER + seccompProfile: + type: RuntimeDefault + image: huggingface/downloader:0.17.3 + command: ['sh', '-ec'] + args: + - | + echo "Huggingface log in ..."; + huggingface-cli login --token $(HF_TOKEN); + echo "Download model {{ .Values.EMBEDDING_MODEL_ID }} ... "; + huggingface-cli download --cache-dir /data {{ .Values.EMBEDDING_MODEL_ID | quote }}; + echo "Change model files mode ..."; + chmod -R g+w /data/models--{{ replace "/" "--" .Values.EMBEDDING_MODEL_ID }} + # NOTE: Buggy logout command; + # huggingface-cli logout; + volumeMounts: + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + {{- end }} + containers: + - name: {{ .Chart.Name }} + envFrom: + - configMapRef: + name: {{ include "mm-embedding.fullname" . }}-config + {{- if .Values.global.extraEnvConfig }} + - configMapRef: + name: {{ .Values.global.extraEnvConfig }} + optional: true + {{- end }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + {{- if .Values.image.pullPolicy }} + imagePullPolicy: {{ .Values.image.pullPolicy }} + {{- end }} + args: + - "--model_name_or_path" + - {{ .Values.EMBEDDING_MODEL_ID | quote }} + {{- if .Values.extraCmdArgs }} + {{- range .Values.extraCmdArgs }} + - {{ . | quote }} + {{- end }} + {{- end }} + ports: + - name: mm-embedding + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- if .Values.livenessProbe }} + livenessProbe: + {{- toYaml .Values.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.readinessProbe }} + readinessProbe: + {{- toYaml .Values.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.startupProbe }} + startupProbe: + {{- toYaml .Values.startupProbe | nindent 12 }} + {{- end }} + resources: + {{- toYaml .Values.resources | nindent 12 }} + volumeMounts: + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + - mountPath: /data + name: model-volume + - mountPath: /tmp + name: tmp + volumes: + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + - name: model-volume + {{- if .Values.global.modelUsePVC }} + persistentVolumeClaim: + claimName: {{ .Values.global.modelUsePVC }} + {{- else if .Values.global.modelUseHostPath }} + hostPath: + path: {{ .Values.global.modelUseHostPath }} + type: Directory + {{- else }} + emptyDir: {} + {{- end }} + - name: tmp + emptyDir: {} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if not .Values.accelDevice }} + # extra time to finish processing buffered requests on CPU before pod is forcibly terminated + terminationGracePeriodSeconds: 120 + {{- end }} + {{- if .Values.evenly_distributed }} + topologySpreadConstraints: + - maxSkew: 1 + topologyKey: kubernetes.io/hostname + whenUnsatisfiable: ScheduleAnyway + labelSelector: + matchLabels: + {{- include "mm-embedding.selectorLabels" . | nindent 14 }} + {{- end }} diff --git a/helm-charts/common/mm-embedding/templates/horizontal-pod-autoscaler.yaml b/helm-charts/common/mm-embedding/templates/horizontal-pod-autoscaler.yaml new file mode 100644 index 000000000..f60d91b7d --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/horizontal-pod-autoscaler.yaml @@ -0,0 +1,35 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "mm-embedding.fullname" . }} + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "mm-embedding.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/helm-charts/common/mm-embedding/templates/service.yaml b/helm-charts/common/mm-embedding/templates/service.yaml new file mode 100644 index 000000000..e3070858e --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/service.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Service +metadata: + name: {{ include "mm-embedding.fullname" . }} + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: 80 + targetPort: mm-embedding + protocol: TCP + name: mm-embedding + selector: + {{- include "mm-embedding.selectorLabels" . | nindent 4 }} diff --git a/helm-charts/common/mm-embedding/templates/serviceaccount.yaml b/helm-charts/common/mm-embedding/templates/serviceaccount.yaml new file mode 100644 index 000000000..7cfbe6742 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/serviceaccount.yaml @@ -0,0 +1,16 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +{{- if .Values.serviceAccount.create }} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "mm-embedding.serviceAccountName" . }} + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/helm-charts/common/mm-embedding/templates/servicemonitor.yaml b/helm-charts/common/mm-embedding/templates/servicemonitor.yaml new file mode 100644 index 000000000..1dcd93808 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/servicemonitor.yaml @@ -0,0 +1,18 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +{{- if .Values.global.monitoring }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + name: {{ include "mm-embedding.fullname" . }} + labels: + release: {{ .Values.global.prometheusRelease }} +spec: + selector: + matchLabels: + {{- include "mm-embedding.selectorLabels" . | nindent 6 }} + endpoints: + - port: mm-embedding + interval: 5s +{{- end }} diff --git a/helm-charts/common/mm-embedding/templates/tests/test-pod.yaml b/helm-charts/common/mm-embedding/templates/tests/test-pod.yaml new file mode 100644 index 000000000..b5be3d0e0 --- /dev/null +++ b/helm-charts/common/mm-embedding/templates/tests/test-pod.yaml @@ -0,0 +1,34 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "mm-embedding.fullname" . }}-testpod" + labels: + {{- include "mm-embedding.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: curl + image: python:3.10 + command: ['bash', '-c'] + args: + - | + {{- if contains "embedding-multimodal-bridgetower" .Values.image.repository }} + url=encode; + {{- else }} + url=embeddings; + {{- end }} + max_retry=20; + for ((i=1; i<=max_retry; i++)); do + curl http://{{ include "mm-embedding.fullname" . }}/v1/$url -sS --fail-with-body \ + -X POST \ + -d '{"text":"This is example"}' \ + -H 'Content-Type: application/json' && break; + curlcode=$? + if [[ $curlcode -eq 7 ]]; then sleep 10; else echo "curl failed with code $curlcode"; exit 1; fi; + done; + if [ $i -gt $max_retry ]; then echo "test failed with maximum retry"; exit 1; fi + restartPolicy: Never diff --git a/helm-charts/common/mm-embedding/values.yaml b/helm-charts/common/mm-embedding/values.yaml new file mode 100644 index 000000000..bd7cae7d1 --- /dev/null +++ b/helm-charts/common/mm-embedding/values.yaml @@ -0,0 +1,148 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# Default values for mm-embedding. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# Configurations for OPEA microservice mm-embedding +# Set to "true" to enable verbose logging +LOGFLAG: "" +# NOTE: Currently the model is hard coded in the corresponding microservices +# embedding-multimodal-bridgetower: BridgeTower/bridgetower-large-itm-mlm-itc +# embedding-multimodal-clip: openai/clip-vit-base-patch32 +# We have this settings for future enhancement of changing models +EMBEDDING_MODEL_ID: BridgeTower/bridgetower-large-itm-mlm-itc + +accelDevice: "" + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: opea/embedding-multimodal-bridgetower + # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. + # pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" + +# This is for the secretes for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: false + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: + readOnlyRootFilesystem: false + allowPrivilegeEscalation: false + runAsNonRoot: true + runAsUser: 1000 + capabilities: + drop: + - ALL + seccompProfile: + type: RuntimeDefault + # Init container sets the downloaded model dir to be group writable, so that container + # can keep its lock file there. This relies on both containers using the same group ID. + runAsGroup: 0 + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 6990 + +resources: + # limits: + # cpu: 100m + # memory: 128Mi + requests: + cpu: 100m + memory: 128Mi + +# Set up pod health monitoring probes, see https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +# Use TCP probe instead of HTTP health_check because mm-embedding-gaudi would fail normal requests in HTTP probe +readinessProbe: + httpGet: + path: v1/health_check + port: mm-embedding + initialDelaySeconds: 5 + periodSeconds: 5 +startupProbe: + httpGet: + path: v1/health_check + port: mm-embedding + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 120 + +# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ +# Note: Do not use (above) "replicaCount" with HPA (Chart ignores value=1 as it's k8s default) +# Note: Because HPA can sometimes change replica counts up and down rather frequently, the microservice needs to handle SIGTERM elegantly: +# - stop accepting new requests +# - handle all of its buffered requests +# - terminate after tthose have been processed +# See https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-termination. +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 4 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +global: + http_proxy: "" + https_proxy: "" + no_proxy: "" + HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here" + # service account name to be shared with all parent/child charts. + # If set, it will overwrite serviceAccount.name. + # If set, and serviceAccount.create is false, it will assume this service account is already created by others. + sharedSAName: "" + + # Choose where to save your downloaded models + # Set modelUseHostPath for local directory, this is good for one node test, e.g /mnt/opea-models + # Set modelUsePVC for PersistentVolumeClaim(PVC), which is suitable for multinode deployment, e.g. model-volume + # You can only set one of the following variable, the behavior is not defined if both of them are set. + # By default, both are set to empty, the model will be downloaded every time and will not be saved into any permanent storage medium. + modelUseHostPath: "" + modelUsePVC: "" + + monitoring: false + # Prometheus Helm install release name for serviceMonitor + prometheusRelease: prometheus-stack diff --git a/helm-charts/common/mm-embedding/variant_clip-values.yaml b/helm-charts/common/mm-embedding/variant_clip-values.yaml new file mode 100644 index 000000000..4fd338961 --- /dev/null +++ b/helm-charts/common/mm-embedding/variant_clip-values.yaml @@ -0,0 +1,10 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +# The following settings are for service embedding-multimodal-clip + +image: + repository: opea/embedding-multimodal-clip + tag: "latest" + +EMBEDDING_MODEL_ID: openai/clip-vit-base-patch32 diff --git a/helm-charts/common/web-retriever/.helmignore b/helm-charts/common/web-retriever/.helmignore index 0e8a0eb36..d2c43a2ac 100644 --- a/helm-charts/common/web-retriever/.helmignore +++ b/helm-charts/common/web-retriever/.helmignore @@ -21,3 +21,5 @@ .idea/ *.tmproj .vscode/ +# CI values +ci*-values.yaml diff --git a/helm-charts/common/web-retriever/values.yaml b/helm-charts/common/web-retriever/values.yaml index 12aba2905..07b2b86fc 100644 --- a/helm-charts/common/web-retriever/values.yaml +++ b/helm-charts/common/web-retriever/values.yaml @@ -19,7 +19,7 @@ GOOGLE_API_KEY: "" GOOGLE_CSE_ID: "" image: - repository: opea/web-retriever-chroma + repository: opea/web-retriever # Uncomment the following line to set desired image pull policy if needed, as one of Always, IfNotPresent, Never. # pullPolicy: "" # Overrides the image tag whose default is the chart appVersion. @@ -61,25 +61,14 @@ service: # The default port for retriever service is 7000 port: 7077 -resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. +resources: # limits: # cpu: 100m # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi + requests: + cpu: 100m + memory: 128Mi -livenessProbe: - httpGet: - path: v1/health_check - port: web-retriever - initialDelaySeconds: 5 - periodSeconds: 5 - failureThreshold: 24 readinessProbe: httpGet: path: v1/health_check