deploy/compose/compose.env

# full path to the local copy of the model weights
# NOTE: This should be an absolute path and not relative path
export MODEL_DIRECTORY="/home/nvidia/llama2_13b_chat_hf_v1/"
# export MODEL_DIRECTORY="/home/nvidia/nemotron-3-8b-chat-4k-sft"

# Fill this out if you dont have a GPU. Leave this empty if you have a local GPU
export NVIDIA_API_KEY="nvapi-*"

# flag to enable activation aware quantization for the LLM
# export QUANTIZATION="int4_awq"

# the architecture of the model. eg: llama, gptnext (for nemotron use gptnext)
export MODEL_ARCHITECTURE="llama"


# the name of the model being used - only for displaying on frontend
export MODEL_NAME="Llama-2-13b-chat-hf"

# the name of the RAG example being used
export RAG_EXAMPLE="developer_rag"

# [OPTIONAL] the maximum number of input tokens
# export MODEL_MAX_INPUT_LENGTH=3000

# [OPTIONAL] the maximum number of output tokens
# export MODEL_MAX_OUTPUT_LENGTH=512

# [OPTIONAL] the number of GPUs to make available to the inference server
# export INFERENCE_GPU_COUNT="all"

# [OPTIONAL] the base directory inside which all persistent volumes will be created
# export DOCKER_VOLUME_DIRECTORY="."

# [OPTIONAL] the config file for chain server w.r.t. pwd
export APP_CONFIG_FILE=/dev/null

# parameters for PGVector, update this when using PGVector Vecotor store
# export POSTGRES_PASSWORD=password
# export POSTGRES_USER=postgres
# export POSTGRES_DB=api

### Riva Parameters:

# Riva Speech API URI: Riva Server IP address/hostname and port
export RIVA_API_URI=""

# [OPTIONAL] Riva Speech API Key
# If necessary, enter a key to access the Riva API
export RIVA_API_KEY=""

# [OPTIONAL] Riva Function ID
# If necessary, enter a function ID to access the Riva API
export RIVA_FUNCTION_ID=""

# TTS sample rate (Hz)
export TTS_SAMPLE_RATE=48000

# the config file for the OpenTelemetry collector
export OPENTELEMETRY_CONFIG_FILE="./configs/otel-collector-config.yaml"
# the config file for Jaeger
export JAEGER_CONFIG_FILE="./configs/jaeger.yaml"