-
Notifications
You must be signed in to change notification settings - Fork 585
/
compose.env
61 lines (43 loc) · 1.96 KB
/
compose.env
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# full path to the local copy of the model weights
# NOTE: This should be an absolute path and not relative path
export MODEL_DIRECTORY="/home/nvidia/llama2_13b_chat_hf_v1/"
# export MODEL_DIRECTORY="/home/nvidia/nemotron-3-8b-chat-4k-sft"
# Fill this out if you dont have a GPU. Leave this empty if you have a local GPU
export NVIDIA_API_KEY="nvapi-*"
# flag to enable activation aware quantization for the LLM
# export QUANTIZATION="int4_awq"
# the architecture of the model. eg: llama, gptnext (for nemotron use gptnext)
export MODEL_ARCHITECTURE="llama"
# the name of the model being used - only for displaying on frontend
export MODEL_NAME="Llama-2-13b-chat-hf"
# the name of the RAG example being used
export RAG_EXAMPLE="developer_rag"
# [OPTIONAL] the maximum number of input tokens
# export MODEL_MAX_INPUT_LENGTH=3000
# [OPTIONAL] the maximum number of output tokens
# export MODEL_MAX_OUTPUT_LENGTH=512
# [OPTIONAL] the number of GPUs to make available to the inference server
# export INFERENCE_GPU_COUNT="all"
# [OPTIONAL] the base directory inside which all persistent volumes will be created
# export DOCKER_VOLUME_DIRECTORY="."
# [OPTIONAL] the config file for chain server w.r.t. pwd
export APP_CONFIG_FILE=/dev/null
# parameters for PGVector, update this when using PGVector Vecotor store
# export POSTGRES_PASSWORD=password
# export POSTGRES_USER=postgres
# export POSTGRES_DB=api
### Riva Parameters:
# Riva Speech API URI: Riva Server IP address/hostname and port
export RIVA_API_URI=""
# [OPTIONAL] Riva Speech API Key
# If necessary, enter a key to access the Riva API
export RIVA_API_KEY=""
# [OPTIONAL] Riva Function ID
# If necessary, enter a function ID to access the Riva API
export RIVA_FUNCTION_ID=""
# TTS sample rate (Hz)
export TTS_SAMPLE_RATE=48000
# the config file for the OpenTelemetry collector
export OPENTELEMETRY_CONFIG_FILE="./configs/otel-collector-config.yaml"
# the config file for Jaeger
export JAEGER_CONFIG_FILE="./configs/jaeger.yaml"