Skip to content

Commit

Permalink
Merge pull request #5106 from GeorgianaElena/2i2c-aws-us-split
Browse files Browse the repository at this point in the history
2i2c-aws-us: Nodegroup split and k8s update
  • Loading branch information
GeorgianaElena authored Nov 15, 2024
2 parents 5c1e6c6 + cd8c60e commit 85072ec
Show file tree
Hide file tree
Showing 7 changed files with 208 additions and 6 deletions.
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/cosmicds.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ jupyterhub:
name: Cosmic DS, Harvard
url: https://www.cosmicds.cfa.harvard.edu/
singleuser:
nodeSelector:
2i2c/hub-name: cosmicds
# No persistent storage should be kept to reduce any potential data
# retention & privacy issues.
# Ref https://github.com/2i2c-org/infrastructure/issues/2128#issuecomment-1635107926
Expand Down
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/dask-staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ basehub:
name: 2i2c
url: https://2i2c.org
singleuser:
nodeSelector:
2i2c/hub-name: dask-staging
image:
name: pangeo/pangeo-notebook
tag: "latest"
Expand All @@ -39,3 +41,14 @@ basehub:
authenticator_class: "github"
GitHubOAuthenticator:
oauth_callback_url: "https://dask-staging.aws.2i2c.cloud/hub/oauth_callback"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: dask-staging
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: dask-staging
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/itcoocean.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ jupyterhub:
admin_users:
- eeholmes # Eli Holmes, Community representative
singleuser:
nodeSelector:
2i2c/hub-name: itcoocean
# Requested in https://2i2c.freshdesk.com/a/tickets/1320
defaultUrl: /lab
# shared-public for collaboration
Expand Down
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/ncar-cisl.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ basehub:
- NicholasCote # Nicholas Cote, Initial administrator
- nwehrheim # Nick Wehrheim, Community representative
singleuser:
nodeSelector:
2i2c/hub-name: ncar-cisl
image:
# image choice preliminary and is expected to be setup via
# https://ncar-cisl.2i2c.cloud/services/configurator/ by the community
Expand Down Expand Up @@ -250,3 +252,14 @@ basehub:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: "1"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: ncar-cisl
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: ncar-cisl
13 changes: 13 additions & 0 deletions config/clusters/2i2c-aws-us/showcase.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,8 @@ basehub:
Authenticator:
enable_auth_state: true
singleuser:
nodeSelector:
2i2c/hub-name: showcase
extraEnv:
SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-showcase/$(JUPYTERHUB_USER)
PERSISTENT_BUCKET: s3://2i2c-aws-us-persistent-showcase/$(JUPYTERHUB_USER)
Expand Down Expand Up @@ -246,3 +248,14 @@ basehub:
node.kubernetes.io/instance-type: g4dn.xlarge
extra_resource_limits:
nvidia.com/gpu: "1"

dask-gateway:
gateway:
scheduler:
extraPodConfig:
nodeSelector:
2i2c/hub-name: showcase
worker:
extraPodConfig:
nodeSelector:
2i2c/hub-name: showcase
2 changes: 2 additions & 0 deletions config/clusters/2i2c-aws-us/staging.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -33,5 +33,7 @@ jupyterhub:
GitHubOAuthenticator:
oauth_callback_url: "https://staging.aws.2i2c.cloud/hub/oauth_callback"
singleuser:
nodeSelector:
2i2c/hub-name: staging
extraEnv:
SCRATCH_BUCKET: s3://2i2c-aws-us-scratch-staging/$(JUPYTERHUB_USER)
169 changes: 163 additions & 6 deletions eksctl/2i2c-aws-us.jsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,106 @@ local nodeAz = "us-west-2a";
// A `node.kubernetes.io/instance-type label is added, so pods
// can request a particular kind of node with a nodeSelector
local notebookNodes = [
{ instanceType: "r5.xlarge" },
{ instanceType: "r5.4xlarge" },
{ instanceType: "r5.16xlarge" },
// staging
{
instanceType: "r5.xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-staging",
labels+: { "2i2c/hub-name": "staging" },
tags+: { "2i2c:hub-name": "staging" }
},
// dask-staging
{
instanceType: "r5.xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" }
},
// showcase
{
instanceType: "r5.xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" }
},
{
instanceType: "g4dn.xlarge",
namePrefix: "nb-showcase",
minSize: 0,
labels+: { "2i2c/hub-name": "showcase" },
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1",
"2i2c:hub-name": "showcase",
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
},
// Allow provisioning GPUs across all AZs, to prevent situation where all
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
// ncar-cisl
{
instanceType: "r5.xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "nb-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" }
},
{
instanceType: "g4dn.xlarge",
namePrefix: "nb-ncar-cisl",
minSize: 0,
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: {
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1"
"k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1",
"2i2c:hub-name": "ncar-cisl",
},
taints+: {
"nvidia.com/gpu": "present:NoSchedule"
Expand All @@ -40,6 +133,44 @@ local notebookNodes = [
// GPUs in a single AZ are in use and no new nodes can be spawned
availabilityZones: masterAzs,
},
// itcoocean
{
instanceType: "r5.xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "itcoocean",
labels+: { "2i2c/hub-name": "itcoocean" },
tags+: { "2i2c:hub-name": "itcoocean" }
},
// cosmicds
{
instanceType: "r5.xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
{
instanceType: "r5.4xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
{
instanceType: "r5.16xlarge",
namePrefix: "cosmicds",
labels+: { "2i2c/hub-name": "cosmicds" },
tags+: { "2i2c:hub-name": "cosmicds" }
},
];


Expand All @@ -54,7 +185,24 @@ local daskNodes = [
// A not yet fully established policy is being developed about using a single
// node pool, see https://github.com/2i2c-org/infrastructure/issues/2687.
//
{ instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }},
{
namePrefix: "dask-staging",
labels+: { "2i2c/hub-name": "dask-staging" },
tags+: { "2i2c:hub-name": "dask-staging" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
{
namePrefix: "dask-showcase",
labels+: { "2i2c/hub-name": "showcase" },
tags+: { "2i2c:hub-name": "showcase" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
{
namePrefix: "dask-ncar-cisl",
labels+: { "2i2c/hub-name": "ncar-cisl" },
tags+: { "2i2c:hub-name": "ncar-cisl" },
instancesDistribution+: { instanceTypes: ["r5.4xlarge"] }
},
];


Expand All @@ -64,7 +212,7 @@ local daskNodes = [
metadata+: {
name: "2i2c-aws-us",
region: clusterRegion,
version: "1.29",
version: "1.30",
},
availabilityZones: masterAzs,
iam: {
Expand Down Expand Up @@ -108,6 +256,9 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "core",
"k8s.dask.org/node-purpose": "core"
},
tags+: {
"2i2c:node-purpose": "core"
},
},
] + [
ng + {
Expand All @@ -123,6 +274,9 @@ local daskNodes = [
"hub.jupyter.org/node-purpose": "user",
"k8s.dask.org/node-purpose": "scheduler"
},
tags+: {
"2i2c:node-purpose": "user"
},
taints+: {
"hub.jupyter.org_dedicated": "user:NoSchedule",
"hub.jupyter.org/dedicated": "user:NoSchedule"
Expand All @@ -145,6 +299,9 @@ local daskNodes = [
"k8s.dask.org_dedicated" : "worker:NoSchedule",
"k8s.dask.org/dedicated" : "worker:NoSchedule"
},
tags+: {
"2i2c:node-purpose": "worker"
},
instancesDistribution+: {
onDemandBaseCapacity: 0,
onDemandPercentageAboveBaseCapacity: 0,
Expand Down

0 comments on commit 85072ec

Please sign in to comment.