Skip to content

Commit

Permalink
Feat/use jumpstart streamlined for new llm options (#192)
Browse files Browse the repository at this point in the history
* chore: run terraform fmt with -recursive option

* check on each PR

* wip

* local

* latest wip

* all working

* formatting

* modify

* restore status after problematic merge

* latest

* formatting
  • Loading branch information
aidanrussell authored Dec 19, 2024
1 parent b611555 commit e8aa255
Show file tree
Hide file tree
Showing 6 changed files with 111 additions and 56 deletions.
8 changes: 4 additions & 4 deletions infra/ecs_notebooks_notebook.tf
Original file line number Diff line number Diff line change
Expand Up @@ -354,9 +354,9 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_s3_notebooks" {
"s3:ListBucket",
]

resources = [
resources = concat([
for bucket in aws_s3_bucket.mlflow : bucket.arn
]
], ["arn:aws:s3:::jumpstart-cache-prod-eu-west-2"])
}

statement {
Expand All @@ -369,9 +369,9 @@ data "aws_iam_policy_document" "aws_vpc_endpoint_s3_notebooks" {
"s3:GetObject",
]

resources = [
resources = concat([
"arn:aws:s3:::${var.mirrors_data_bucket_name != "" ? var.mirrors_data_bucket_name : var.mirrors_bucket_name}/*",
]
], ["arn:aws:s3:::jumpstart-cache-prod-eu-west-2/*"])
}

statement {
Expand Down
20 changes: 16 additions & 4 deletions infra/modules/sagemaker_deployment/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,26 @@ resource "aws_sagemaker_model" "sagemaker_model" {
execution_role_arn = var.execution_role_arn

primary_container {
image = var.container_image
model_data_url = var.model_data_url
environment = var.environment
image = var.container_image
environment = var.environment_variables

model_data_source {
s3_data_source {
s3_uri = var.uncompressed_model_uri
s3_data_type = "S3Prefix"
compression_type = "None"
model_access_config {
accept_eula = true
}
}
}
}

vpc_config {
security_group_ids = var.security_group_ids
subnets = var.subnets
}

}

# Endpoint Configuration
Expand Down Expand Up @@ -51,6 +62,7 @@ resource "aws_appautoscaling_target" "autoscaling_target" {
resource_id = "endpoint/${aws_sagemaker_endpoint.sagemaker_endpoint.name}/variant/${var.variant_name}"
scalable_dimension = "sagemaker:variant:DesiredInstanceCount"
service_namespace = "sagemaker"
depends_on = [aws_sagemaker_endpoint.sagemaker_endpoint, aws_sagemaker_endpoint_configuration.endpoint_config]
}

# Autoscaling Policy for Scaling Up
Expand Down Expand Up @@ -135,7 +147,7 @@ resource "aws_appautoscaling_policy" "scale_in_to_zero_based_on_backlog" {
depends_on = [aws_appautoscaling_target.autoscaling_target]
}

resource "aws_cloudwatch_log_metric_filter" "unatuhorized_operations" {
resource "aws_cloudwatch_log_metric_filter" "unauthorized_operations" {
name = "unauthorized-operations-filter"
log_group_name = var.log_group_name
pattern = "{ $.errorCode = \"UnauthorizedOperation\" || $.errorCode = \"AccessDenied\" }"
Expand Down
6 changes: 3 additions & 3 deletions infra/modules/sagemaker_deployment/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@ variable "container_image" {
description = "Container image for the model"
}

variable "model_data_url" {
variable "uncompressed_model_uri" {
type = string
description = "S3 URL where the model data is located"
}

variable "environment" {
variable "environment_variables" {
type = map(string)
description = "Environment variables for the container"
}
Expand Down Expand Up @@ -116,4 +116,4 @@ variable "log_group_name" {
type = string
description = "log group name, i.e. gpt-neo-125m..."
default = ""
}
}
26 changes: 22 additions & 4 deletions infra/modules/sagemaker_init/iam/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -51,17 +51,36 @@ resource "aws_iam_role" "inference_role" {
data "aws_iam_policy_document" "sagemaker_inference_policy_document" {
statement {
actions = [
"s3:ListBucket",
"s3:GetObject",
"s3:PutObject",
"s3:DeleteObject"
]
resources = [
"arn:aws:s3:::*sagemaker*",
"${var.aws_s3_bucket_notebook.arn}/*"
"${var.aws_s3_bucket_notebook.arn}/*",
]
}
statement {
actions = [
"s3:GetObject",
]
resources = [
"arn:aws:s3:::*sagemaker*",
"${var.aws_s3_bucket_notebook.arn}/*",
"arn:aws:s3:::jumpstart-cache-prod-eu-west-2/*"
]
}
statement {
actions = [
"s3:ListBucket",
"s3:GetBucketLocation",
]

resources = [
"arn:aws:s3:::jumpstart-cache-prod-eu-west-2",
"${var.aws_s3_bucket_notebook.arn}",
"arn:aws:s3:::*sagemaker*"
]
}
statement {
actions = [
"SNS:Publish",
Expand Down Expand Up @@ -239,4 +258,3 @@ resource "aws_iam_role_policy_attachment" "attach_cloudwatch_log_invoke_policy"
role = aws_iam_role.lambda_execution_role.name
policy_arn = aws_iam_policy.cloudwatch_log_invoke_policy.arn
}

88 changes: 48 additions & 40 deletions infra/sagemaker_llm_resources.tf
Original file line number Diff line number Diff line change
@@ -1,25 +1,37 @@
##################################################################################################################
# GPT Neo 125M parameter endpoint and associated alarms and policies
#################################################################################################################
locals {
all_endpoint_names = [
module.gpt_neo_125_deployment.endpoint_name,
module.llama_3_2_1b_deployment.endpoint_name
]
}

################
# GPT Neo 125m
###############

module "gpt_neo_125_deployment" {
source = "./modules/sagemaker_deployment"
model_name = "gpt-neo-125m"
sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn
execution_role_arn = module.iam.inference_role
container_image = var.hugging_face_model_image
model_data_url = "${var.sagemaker_models_folder}/gpt-neo-125m.tar.gz"
environment = {
"HF_MODEL_ID" = "/opt/ml/model/"
"SM_NUM_GPUS" = 1
"MAX_INPUT_LENGTH" = 1024
"MAX_TOTAL_TOKENS" = 2048
source = "./modules/sagemaker_deployment"
model_name = "gpt-neo-125m"
sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn
execution_role_arn = module.iam.inference_role
container_image = "763104351884.dkr.ecr.eu-west-2.amazonaws.com/huggingface-pytorch-tgi-inference:2.1.1-tgi1.4.0-gpu-py310-cu121-ubuntu20.04"
uncompressed_model_uri = "s3://jumpstart-cache-prod-eu-west-2/huggingface-textgeneration1/huggingface-textgeneration1-gpt-neo-125m/artifacts/inference-prepack/v2.0.0/"
environment_variables = {
"ENDPOINT_SERVER_TIMEOUT" : "3600",
"HF_MODEL_ID" : "/opt/ml/model",
"MAX_INPUT_LENGTH" : "1024",
"MAX_TOTAL_TOKENS" : "2048",
"MODEL_CACHE_ROOT" : "/opt/ml/model",
"SAGEMAKER_ENV" : "1",
"SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
"SAGEMAKER_PROGRAM" : "inference.py",
"SM_NUM_GPUS" : "1"
}
security_group_ids = [aws_security_group.notebooks.id]
subnets = aws_subnet.private_without_egress.*.id
endpoint_config_name = "sagemaker-endpoint-config-gpt-neo-125m"
endpoint_name = "gpt-neo-125-endpoint"
variant_name = "gpt-neo-125m-endpoint-example"
variant_name = "gpt-neo-125m-endpoint-dev"
instance_type = "ml.g5.2xlarge"
s3_output_path = "https://${module.iam.default_sagemaker_bucket.bucket_regional_domain_name}"
initial_instance_count = 1
Expand All @@ -43,6 +55,7 @@ module "gpt_neo_125_deployment" {
period = 30
statistic = "Average"
alarm_actions = [module.gpt_neo_125_deployment.scale_up_policy_arn]
sns_topic_name = "backlog-alarm-${module.gpt_neo_125_deployment.endpoint_name}"
},
{
alarm_name = "low-cpu-alarm-${module.gpt_neo_125_deployment.endpoint_name}"
Expand All @@ -56,6 +69,7 @@ module "gpt_neo_125_deployment" {
period = 60
statistic = "Average"
alarm_actions = [module.gpt_neo_125_deployment.scale_in_to_zero_policy_arn]
sns_topic_name = "low-cpu-alert-${module.gpt_neo_125_deployment.endpoint_name}"
},
{
alarm_name = "no-query-in-backlog-alarm-${module.gpt_neo_125_deployment.endpoint_name}"
Expand Down Expand Up @@ -183,33 +197,34 @@ module "gpt_neo_125_deployment" {
alarm_actions = [module.sns.unauthorised_access_sns_topic_arn]
}
]

}


##################################################################################################################
# Llama 3.2 1B parameter endpoint and associated alarms and policies
#################################################################################################################

###############
# Llama 3.2 1B
###############
module "llama_3_2_1b_deployment" {
source = "./modules/sagemaker_deployment"
model_name = "Llama-3-2-1B"
sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn
execution_role_arn = module.iam.inference_role
container_image = var.hugging_face_model_image
model_data_url = "${var.sagemaker_models_folder}/Llama-3.2-1B.tar.gz"
environment = {
"HF_MODEL_ID" = "/opt/ml/model/"
"SM_NUM_GPUS" = 1
"MAX_INPUT_LENGTH" = 1024
"MAX_TOTAL_TOKENS" = 2048
source = "./modules/sagemaker_deployment"
model_name = "llama-3-2-1b"
sns_success_topic_arn = module.sagemaker_output_mover.sns_success_topic_arn
execution_role_arn = module.iam.inference_role
container_image = "763104351884.dkr.ecr.eu-west-2.amazonaws.com/djl-inference:0.29.0-lmi11.0.0-cu124"
uncompressed_model_uri = "s3://jumpstart-private-cache-prod-eu-west-2/meta-textgeneration/meta-textgeneration-llama-3-2-1b/artifacts/inference-prepack/v1.0.0/"
environment_variables = {
"ENDPOINT_SERVER_TIMEOUT" : "3600",
"HF_MODEL_ID" : "/opt/ml/model",
"MODEL_CACHE_ROOT" : "/opt/ml/model",
"OPTION_ENABLE_CHUNKED_PREFILL" : "true",
"SAGEMAKER_ENV" : "1",
"SAGEMAKER_MODEL_SERVER_WORKERS" : "1",
"SAGEMAKER_PROGRAM" : "inference.py"
}
security_group_ids = [aws_security_group.notebooks.id]
subnets = aws_subnet.private_without_egress.*.id
endpoint_config_name = "sagemaker-endpoint-config-llama-3-2-1B"
endpoint_name = "llama-3-2-1b-endpoint"
variant_name = "llama-3-2-1B-endpoint-example"
instance_type = "ml.g5.2xlarge"
variant_name = "llama-3-2-1b-endpoint-dev"
instance_type = "ml.g6.xlarge"
initial_instance_count = 1
s3_output_path = "https://${module.iam.default_sagemaker_bucket.bucket_regional_domain_name}"
max_capacity = 2
Expand Down Expand Up @@ -373,10 +388,3 @@ module "llama_3_2_1b_deployment" {
}
]
}

locals {
all_endpoint_names = [
module.gpt_neo_125_deployment.endpoint_name,
module.llama_3_2_1b_deployment.endpoint_name
]
}
19 changes: 18 additions & 1 deletion infra/vpc.tf
Original file line number Diff line number Diff line change
Expand Up @@ -537,14 +537,31 @@ data "aws_iam_policy_document" "datasets_s3_endpoint" {
}

actions = [
"s3:GetObject",
"s3:GetObject"
]

resources = [
"arn:aws:s3:::prod-${data.aws_region.aws_region.name}-starport-layer-bucket/*",
]
}
}
dynamic "statement" {
for_each = var.arango_on ? [0] : []
content {
principals {
type = "AWS"
identifiers = ["*"]
}

actions = [
"s3:ListBucket",
]

resources = [
"arn:aws:s3:::prod-${data.aws_region.aws_region.name}-starport-layer-bucket",
]
}
}
}

resource "aws_vpc_endpoint" "datasets_ec2_endpoint" {
Expand Down

0 comments on commit e8aa255

Please sign in to comment.