Add lvm releated microservices

- Add lvm chart for large vision model ineference - Adapt changes to lvm-uservice Signed-off-by: Lianhao Lu <[email protected]>
opea-project · Jan 14, 2025 · 2a6f441 · 2a6f441
1 parent ddf0500
commit 2a6f441
Show file tree

Hide file tree

Showing 25 changed files with 922 additions and 57 deletions.
diff --git a/helm-charts/common/lvm-serve/.helmignore b/helm-charts/common/lvm-serve/.helmignore
@@ -0,0 +1,25 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
+# CI values
+ci*-values.yaml
diff --git a/helm-charts/common/lvm-serve/Chart.yaml b/helm-charts/common/lvm-serve/Chart.yaml
@@ -0,0 +1,9 @@
+# Copyright (C) 2025 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v2
+appVersion: "1.1"
+description: A Helm chart for deploying opea LVM(large vision model) inference microservices
+name: lvm-serve
+type: application
+version: 0-latest
diff --git a/helm-charts/common/lvm-serve/README.md b/helm-charts/common/lvm-serve/README.md
@@ -0,0 +1,60 @@
+# OPEA lvm-serve microservice
+
+Helm chart for deploying OPEA large vision model service.
+
+## Installing the Chart
+
+To install the chart, run the following:
+
+```console
+cd GenAIInfra/helm-charts/common
+export MODELDIR=/mnt/opea-models
+export HFTOKEN="insert-your-huggingface-token-here"
+export LVM_MODEL_ID="llava-hf/llava-1.5-7b-hf"
+# To deploy lvm-llava microserice on CPU
+helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID}
+# To deploy lvm-llava  microserice on Gaudi
+# helm install lvm-serve lvm-serve --set global.modelUseHostPath=${MODELDIR} --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --set LVM_MODEL_ID=${LVM_MODEL_ID} --values lvm-serve/gaudi-values.yaml
+# To deploy lvm-video-llama microserice on CPU
+helm install lvm-serve lvm-serve --set global.HUGGINGFACEHUB_API_TOKEN=${HFTOKEN} --values lvm-serve/variant_video-llama-values.yaml
+```
+
+By default, the lvm-serve-llava service will downloading the model "llava-hf/llava-1.5-7b-hf" which is about 14GB.
+
+If you already cached the model locally, you can pass it to container like this example:
+
+MODELDIR=/mnt/opea-models
+
+MODELNAME="/data/models--llava-hf--llava-1.5-7b-hf"
+
+## Verify
+
+To verify the installation, run the command `kubectl get pod` to make sure all pods are runinng and in ready state.
+
+Then run the command `kubectl port-forward svc/lvm-serve 9399:9399` to expose the lvm-serve service for access.
+
+Open another terminal and run the following command to verify the service if working:
+
+```console
+# Verify with lvm-llava
+pip install Pillow requests
+image_b64_str=$(python -c 'import base64;from io import BytesIO;import PIL.Image;import requests;image_path = "https://avatars.githubusercontent.com/u/39623753?s=40&v=4";image = PIL.Image.open(requests.get(image_path, stream=True, timeout=3000).raw);buffered = BytesIO();image.save(buffered, format="PNG");img_b64_str = base64.b64encode(buffered.getvalue()).decode();print(img_b64_str)')
+body="{\"img_b64_str\": \"${image_b64_str}\", \"prompt\": \"What is this?\", \"max_new_tokens\": 32}"
+url="http://localhost:9399/generate"
+curl $url -XPOST -d "$body" -H 'Content-Type: application/json'
+
+# Verify with lvm-video-llama
+body='{"image": "iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs+9AAAAFUlEQVR42mP8/5+hnoEIwDiqkL4KAcT9GO0U4BxoAAAAAElFTkSuQmCC", "prompt": "Describe the image.", "max_new_tokens": 32}'
+url="http://localhost:9399/v1/lvm-serve"
+curl $url -XPOST -d "$body" -H 'Content-Type: application/json'
+```
+
+## Values
+
+| Key                             | Type   | Default                              | Description                                                                                                                                                                                                               |
+| ------------------------------- | ------ | ------------------------------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| global.HUGGINGFACEHUB_API_TOKEN | string | `insert-your-huggingface-token-here` | Hugging Face API token                                                                                                                                                                                                    |
+| global.modelUseHostPath         | string | `""`                                 | Cached models directory, service will not download if the model is cached here. The host path "modelUseHostPath" will be mounted to container as /data directory. Set this to null/empty will force it to download model. |
+| LVM_MODEL_ID                    | string | `"llava-hf/llava-1.5-7b-hf"`         |                                                                                                                                                                                                                           |
+| autoscaling.enabled             | bool   | `false`                              | Enable HPA autoscaling for the service deployment based on metrics it provides. See [HPA instructions](../../HPA.md) before enabling!                                                                                     |
+| global.monitoring               | bool   | `false`                              | Enable usage metrics for the service. Required for HPA. See [monitoring instructions](../../monitoring.md) before enabling!                                                                                               |
diff --git a/helm-charts/common/lvm-serve/ci-values.yaml b/helm-charts/common/lvm-serve/ci-values.yaml
@@ -0,0 +1 @@
+values.yaml
diff --git a/helm-charts/common/lvm-serve/gaudi-values.yaml b/helm-charts/common/lvm-serve/gaudi-values.yaml
@@ -0,0 +1,22 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+accelDevice: "gaudi"
+
+image:
+  repository: opea/llava-gaudi
+  tag: "latest"
+
+resources:
+  limits:
+    habana.ai/gaudi: 1
+
+readinessProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+startupProbe:
+  initialDelaySeconds: 5
+  periodSeconds: 5
+  timeoutSeconds: 1
+  failureThreshold: 120
diff --git a/helm-charts/common/lvm-serve/templates/_helpers.tpl b/helm-charts/common/lvm-serve/templates/_helpers.tpl
@@ -0,0 +1,64 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "lvm-serve.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
+If release name contains chart name it will be used as a full name.
+*/}}
+{{- define "lvm-serve.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Create chart name and version as used by the chart label.
+*/}}
+{{- define "lvm-serve.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "lvm-serve.labels" -}}
+helm.sh/chart: {{ include "lvm-serve.chart" . }}
+{{ include "lvm-serve.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "lvm-serve.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "lvm-serve.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Create the name of the service account to use
+*/}}
+{{- define "lvm-serve.serviceAccountName" -}}
+{{- if .Values.global.sharedSAName }}
+{{- .Values.global.sharedSAName }}
+{{- else if .Values.serviceAccount.create }}
+{{- default (include "lvm-serve.fullname" .) .Values.serviceAccount.name }}
+{{- else }}
+{{- default "default" .Values.serviceAccount.name }}
+{{- end }}
+{{- end }}
diff --git a/helm-charts/common/lvm-serve/templates/configmap.yaml b/helm-charts/common/lvm-serve/templates/configmap.yaml
@@ -0,0 +1,43 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: {{ include "lvm-serve.fullname" . }}-config
+  labels:
+    {{- include "lvm-serve.labels" . | nindent 4 }}
+data:
+  {{- if or (hasSuffix "lvm-llava" .Values.image.repository) (hasSuffix "llava-gaudi" .Values.image.repository) }}
+  LVM_MODEL_ID: {{ .Values.LVM_MODEL_ID | quote }}
+  {{- else if hasSuffix "lvm-video-llama" .Values.image.repository }}
+  llm_download: {{ .Values.LLM_DOWNLOAD | default "True" | quote }}
+  {{- else if hasSuffix "lvm-predictionguard" .Values.image.repository }}
+  PREDICTIONGUARD_API_KEY: {{ .Values.PREDICTIONGUARD_API_KEY | quote }}
+  {{- else if hasSuffix "lvm-llama-vision" .Values.image.repository }}
+  LLAMA_VISION_MODEL_ID: {{ .Values.LVM_MODEL_ID | quote }}
+  {{- else if hasSuffix "lvm-llama-vision-guard" .Values.image.repository }}
+  LLAMA_VISION_GUARD_MODEL_ID: {{ .Values.LVM_MODEL_ID | quote }}
+  {{- else if hasSuffix "lvm-llama-vision-tp" .Values.image.repository }}
+  MODEL_ID: {{ .Values.LVM_MODEL_ID | quote }}
+  {{- end }}
+  HF_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  HUGGINGFACEHUB_API_TOKEN: {{ .Values.global.HUGGINGFACEHUB_API_TOKEN | quote}}
+  {{- if .Values.global.HF_ENDPOINT }}
+  HF_ENDPOINT: {{ .Values.global.HF_ENDPOINT | quote}}
+  {{- end }}
+  http_proxy: {{ .Values.global.http_proxy | quote }}
+  https_proxy: {{ .Values.global.https_proxy | quote }}
+  no_proxy: {{ .Values.global.no_proxy | quote }}
+  LOGFLAG: {{ .Values.LOGFLAG | quote }}
+  HF_HOME: "/tmp/.cache/huggingface"
+  {{- if not (regexMatch "(lvm-video-llama|lvm-predictionguard)$" .Values.image.repository) }}
+  HF_HUB_CACHE: "/data"
+  {{- end }}
+  HABANA_LOGS: "/tmp/habana_logs"
+  {{- if .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES }}
+  PT_HPU_ENABLE_LAZY_COLLECTIVES: {{ .Values.PT_HPU_ENABLE_LAZY_COLLECTIVES | quote }}
+  {{- end }}
+  {{- if .Values.OMPI_MCA_btl_vader_single_copy_mechanism }}
+  OMPI_MCA_btl_vader_single_copy_mechanism: {{ .Values.OMPI_MCA_btl_vader_single_copy_mechanism | quote}}
+  {{- end }}
diff --git a/helm-charts/common/lvm-serve/templates/deployment.yaml b/helm-charts/common/lvm-serve/templates/deployment.yaml
@@ -0,0 +1,167 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "lvm-serve.fullname" . }}
+  labels:
+    {{- include "lvm-serve.labels" . | nindent 4 }}
+spec:
+  {{- if ne (int .Values.replicaCount) 1 }}
+  replicas: {{ .Values.replicaCount }}
+  {{- end }}
+  selector:
+    matchLabels:
+      {{- include "lvm-serve.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      {{- with .Values.podAnnotations }}
+      annotations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      labels:
+        {{- include "lvm-serve.labels" . | nindent 8 }}
+        {{- with .Values.podLabels }}
+        {{- toYaml . | nindent 8 }}
+        {{- end }}
+    spec:
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      serviceAccountName: {{ include "lvm-serve.serviceAccountName" . }}
+      securityContext:
+        {{- toYaml .Values.podSecurityContext | nindent 8 }}
+      {{- if and (not (hasPrefix "/data/" .Values.LVM_MODEL_ID)) (not (regexMatch "(lvm-video-llama|lvm-predictionguard)$" .Values.image.repository)) }}
+      initContainers:
+        - name: model-downloader
+          envFrom:
+            - configMapRef:
+                name: {{ include "lvm-serve.fullname" . }}-config
+          securityContext:
+            readOnlyRootFilesystem: true
+            allowPrivilegeEscalation: false
+            {{- if hasKey .Values.securityContext "runAsGroup" }}
+            runAsGroup: {{ .Values.securityContext.runAsGroup }}
+            {{- end }}
+            capabilities:
+              drop:
+              - ALL
+              add:
+              - DAC_OVERRIDE
+              # To be able to make data model directory group writable for
+              # previously downloaded model by old versions of helm chart
+              - FOWNER
+            seccompProfile:
+              type: RuntimeDefault
+          image: huggingface/downloader:0.17.3
+          command: ['sh', '-ec']
+          args:
+            - |
+              echo "Huggingface log in ...";
+              huggingface-cli login --token $(HF_TOKEN);
+              echo "Download model {{ .Values.LVM_MODEL_ID }} ... ";
+              huggingface-cli download --cache-dir /data {{ .Values.LVM_MODEL_ID | quote }};
+              echo "Change model files mode ...";
+              chmod -R g+w /data/models--{{ replace "/" "--" .Values.LVM_MODEL_ID }}
+              # NOTE: Buggy logout command;
+              # huggingface-cli logout;
+          volumeMounts:
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+      {{- end }}
+      containers:
+        - name: {{ .Chart.Name }}
+          envFrom:
+            - configMapRef:
+                name: {{ include "lvm-serve.fullname" . }}-config
+            {{- if .Values.global.extraEnvConfig }}
+            - configMapRef:
+                name: {{ .Values.global.extraEnvConfig }}
+                optional: true
+            {{- end }}
+          securityContext:
+            {{- toYaml .Values.securityContext | nindent 12 }}
+          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}"
+          {{- if .Values.image.pullPolicy }}
+          imagePullPolicy: {{ .Values.image.pullPolicy }}
+          {{- end }}
+          args:
+            - "--model_name_or_path"
+            - {{ .Values.LVM_MODEL_ID | quote }}
+            {{- if .Values.extraCmdArgs }}
+            {{- range .Values.extraCmdArgs }}
+            - {{ . | quote }}
+            {{- end }}
+            {{- end }}
+          ports:
+            - name: lvm-serve
+              containerPort: {{ .Values.port }}
+              protocol: TCP
+          {{- if .Values.livenessProbe }}
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.readinessProbe }}
+          readinessProbe:
+            {{- toYaml .Values.readinessProbe | nindent 12 }}
+          {{- end }}
+          {{- if .Values.startupProbe }}
+          startupProbe:
+            {{- toYaml .Values.startupProbe | nindent 12 }}
+          {{- end }}
+          resources:
+            {{- toYaml .Values.resources | nindent 12 }}
+          volumeMounts:
+          {{- with .Values.volumeMounts }}
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
+            - mountPath: /data
+              name: model-volume
+            - mountPath: /tmp
+              name: tmp
+      volumes:
+      {{- with .Values.volumes }}
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+        - name: model-volume
+          {{- if .Values.global.modelUsePVC }}
+          persistentVolumeClaim:
+            claimName: {{ .Values.global.modelUsePVC }}
+          {{- else if .Values.global.modelUseHostPath }}
+          hostPath:
+            path: {{ .Values.global.modelUseHostPath }}
+            type: Directory
+          {{- else }}
+          emptyDir: {}
+          {{- end }}
+        - name: tmp
+          emptyDir: {}
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if not .Values.accelDevice }}
+      # extra time to finish processing buffered requests on CPU before pod is forcibly terminated
+      terminationGracePeriodSeconds: 120
+      {{- end }}
+      {{- if .Values.evenly_distributed }}
+      topologySpreadConstraints:
+        - maxSkew: 1
+          topologyKey: kubernetes.io/hostname
+          whenUnsatisfiable: ScheduleAnyway
+          labelSelector:
+            matchLabels:
+              {{- include "lvm-serve.selectorLabels" . | nindent 14 }}
+      {{- end }}