Skip to content

Commit

Permalink
helm: Add llm-faqgen-tgi support (#436)
Browse files Browse the repository at this point in the history
Related to issue #280

Signed-off-by: Lianhao Lu <[email protected]>
  • Loading branch information
lianhao authored Sep 24, 2024
1 parent ddeac46 commit 325126e
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 15 deletions.
6 changes: 6 additions & 0 deletions helm-charts/common/llm-uservice/templates/tests/test-pod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ spec:
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json' && break;
{{- else if contains "llm-faqgen-tgi" .Values.image.repository }}
# Try with faqgen endpoint
curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/faqgen -sS --fail-with-body \
-X POST \
-d '{"query":"Text Embeddings Inference (TEI) is a toolkit for deploying and serving open source text embeddings and sequence classification models. TEI enables high-performance extraction for the most popular models, including FlagEmbedding, Ember, GTE and E5."}' \
-H 'Content-Type: application/json' && break;
{{- else }}
curl http://{{ include "llm-uservice.fullname" . }}:{{ .Values.service.port }}/v1/chat/completions -sS --fail-with-body \
-X POST \
Expand Down
6 changes: 6 additions & 0 deletions helm-charts/common/llm-uservice/variant_docsum-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

image:
repository: opea/llm-docsum-tgi
tag: "latest"
9 changes: 9 additions & 0 deletions helm-charts/common/llm-uservice/variant_faqgen-values.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

image:
repository: opea/llm-faqgen-tgi
tag: "latest"

tgi:
LLM_MODEL_ID: meta-llama/Meta-Llama-3-8B-Instruct
19 changes: 12 additions & 7 deletions helm-charts/update_manifests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,24 @@ function generate_yaml {
extraparams="--set image.tag=$NEWTAG"
fi

helm template $chart ./common/$chart --skip-tests --values ./common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true $extraparams > ${outputdir}/$chart.yaml
helm template $chart ./common/$chart --skip-tests --values ./common/$chart/values.yaml --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH $extraparams > ${outputdir}/$chart.yaml

for f in `ls ./common/$chart/*-values.yaml 2>/dev/null `; do
ext=$(basename $f | cut -d'-' -f1)
filename=$(basename $f)
releasename=$chart
if [[ "$filename" =~ ^variant_.*-values.yaml ]]; then
ext=$(echo $filename | sed 's/^variant_//' | sed 's/-values.yaml$//')
outputfile="$ext-${chart}.yaml"
releasename=$ext-$chart
else
ext=$(echo $filename | sed 's/-values.yaml$//')
outputfile="${chart}_${ext}.yaml"
fi
extraparams=""
if [[ $(grep -c 'tag: "latest"' $f) != 0 ]]; then
extraparams="--set image.tag=$NEWTAG"
fi
helm template $chart ./common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true $extraparams > ${outputdir}/${chart}_${ext}.yaml
helm template $releasename ./common/$chart --skip-tests --values ${f} --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH $extraparams > ${outputdir}/${outputfile}
done
}

Expand All @@ -41,7 +50,3 @@ do
echo "Update manifest for $chartname..."
generate_yaml $chartname $OUTPUTDIR
done

# we need special version of docsum-llm-uservice
echo "Update manifest for docsum-llm-uservice..."
helm template docsum ./common/llm-uservice --skip-tests --set global.extraEnvConfig=extra-env-config,global.modelUseHostPath=$MODELPATH,noProbe=true,image.repository=opea/llm-docsum-tgi,image.tag=$NEWTAG> ${OUTPUTDIR}/docsum-llm-uservice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,11 @@ metadata:
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://docsum-tgi"
TGI_LLM_ENDPOINT: "http://docsum-llm-uservice-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
Expand All @@ -33,7 +33,7 @@ metadata:
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
Expand All @@ -45,7 +45,7 @@ spec:
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
---
# Source: llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
Expand All @@ -58,25 +58,25 @@ metadata:
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: docsum
app.kubernetes.io/instance: docsum-llm-uservice
spec:
securityContext:
{}
containers:
- name: docsum
- name: docsum-llm-uservice
envFrom:
- configMapRef:
name: docsum-llm-uservice-config
Expand Down
129 changes: 129 additions & 0 deletions microservices-connector/config/manifests/faqgen-llm-uservice.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
---
# Source: llm-uservice/templates/configmap.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: ConfigMap
metadata:
name: faqgen-llm-uservice-config
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
data:
TGI_LLM_ENDPOINT: "http://faqgen-llm-uservice-tgi"
HUGGINGFACEHUB_API_TOKEN: "insert-your-huggingface-token-here"
HF_HOME: "/tmp/.cache/huggingface"
http_proxy: ""
https_proxy: ""
no_proxy: ""
LOGFLAG: ""
---
# Source: llm-uservice/templates/service.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: v1
kind: Service
metadata:
name: faqgen-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
type: ClusterIP
ports:
- port: 9000
targetPort: 9000
protocol: TCP
name: llm-uservice
selector:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
---
# Source: llm-uservice/templates/deployment.yaml
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

apiVersion: apps/v1
kind: Deployment
metadata:
name: faqgen-llm-uservice
labels:
helm.sh/chart: llm-uservice-1.0.0
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
app.kubernetes.io/version: "v1.0"
app.kubernetes.io/managed-by: Helm
spec:
replicas: 1
selector:
matchLabels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
template:
metadata:
labels:
app.kubernetes.io/name: llm-uservice
app.kubernetes.io/instance: faqgen-llm-uservice
spec:
securityContext:
{}
containers:
- name: faqgen-llm-uservice
envFrom:
- configMapRef:
name: faqgen-llm-uservice-config
- configMapRef:
name: extra-env-config
optional: true
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
readOnlyRootFilesystem: false
runAsNonRoot: true
runAsUser: 1000
seccompProfile:
type: RuntimeDefault
image: "opea/llm-faqgen-tgi:latest"
imagePullPolicy: IfNotPresent
ports:
- name: llm-uservice
containerPort: 9000
protocol: TCP
volumeMounts:
- mountPath: /tmp
name: tmp
livenessProbe:
failureThreshold: 24
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
readinessProbe:
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
startupProbe:
failureThreshold: 120
httpGet:
path: v1/health_check
port: llm-uservice
initialDelaySeconds: 5
periodSeconds: 5
resources:
{}
volumes:
- name: tmp
emptyDir: {}

0 comments on commit 325126e

Please sign in to comment.