Skip to content

Commit

Permalink
Merge pull request #3131 from consideRatio/pr/2i2c-pilot-hubs-update-…
Browse files Browse the repository at this point in the history
…core-node-pool

gcp: k8s version updates, transitions to pd-balanced disks, towards n2- nodes
  • Loading branch information
consideRatio authored Sep 13, 2023
2 parents 1153728 + a862afe commit 31ba2d8
Show file tree
Hide file tree
Showing 11 changed files with 100 additions and 51 deletions.
4 changes: 2 additions & 2 deletions config/clusters/linked-earth/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ basehub:
cpu_limit: null
mem_limit: null
node_selector:
node.kubernetes.io/instance-type: e2-highmem-4
node.kubernetes.io/instance-type: n2-highmem-4
- display_name: "Medium: up to 16 CPU / 128 GB RAM"
description: *profile_list_description
slug: medium
Expand Down Expand Up @@ -165,7 +165,7 @@ basehub:
cpu_limit: null
mem_limit: null
node_selector:
node.kubernetes.io/instance-type: e2-highmem-16
node.kubernetes.io/instance-type: n2-highmem-16
dask-gateway:
gateway:
backend:
Expand Down
2 changes: 1 addition & 1 deletion config/clusters/meom-ige/cluster.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: meom-ige
provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/pangeo-hubs-cluster/nodes?project=columbia
provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/meom-ige-cluster/details?authuser=3&project=meom-ige-cnrs
gcp:
key: enc-deployer-credentials.secret.json
project: meom-ige-cnrs
Expand Down
7 changes: 6 additions & 1 deletion terraform/gcp/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,6 @@ resource "google_container_node_pool" "core" {
location = google_container_cluster.cluster.location
version = var.k8s_versions.core_nodes_version


initial_node_count = 1
autoscaling {
min_node_count = 1
Expand Down Expand Up @@ -213,6 +212,12 @@ resource "google_container_node_pool" "core" {


node_config {
# Balanced disks are much faster than standard disks, and much cheaper
# than SSD disks. It contributes heavily to how fast new nodes spin up,
# as images being pulled takes up a lot of new node spin up time.
# Faster disks provide faster image pulls!
disk_type = "pd-balanced"

labels = {
"hub.jupyter.org/node-purpose" = "core",
"k8s.dask.org/node-purpose" = "core"
Expand Down
15 changes: 11 additions & 4 deletions terraform/gcp/projects/2i2c-uk.tfvars
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
prefix = "two-eye-two-see-uk"
project_id = "two-eye-two-see-uk"

zone = "europe-west2-b"
region = "europe-west2"
zone = "europe-west2-b"
region = "europe-west2"
regional_cluster = true

core_node_machine_type = "n1-highmem-4"
k8s_versions = {
min_master_version : "1.27.4-gke.900",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
}

core_node_machine_type = "n2-highmem-4"
enable_network_policy = true

# Setup a filestore for in-cluster NFS
Expand All @@ -15,7 +22,7 @@ notebook_nodes = {
"user" : {
min : 0,
max : 20,
machine_type : "n1-highmem-4"
machine_type : "n2-highmem-4"
},
}

Expand Down
11 changes: 6 additions & 5 deletions terraform/gcp/projects/callysto.tfvars
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
prefix = "callysto"
project_id = "callysto-202316"

zone = "northamerica-northeast1-b"
region = "northamerica-northeast1"
zone = "northamerica-northeast1-b"
region = "northamerica-northeast1"
regional_cluster = true

k8s_versions = {
min_master_version : "1.25.6-gke.1000",
core_nodes_version : "1.25.6-gke.1000",
notebook_nodes_version : "1.25.6-gke.1000",
min_master_version : "1.27.4-gke.900",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
}

core_node_machine_type = "n2-highmem-2"
Expand Down
21 changes: 14 additions & 7 deletions terraform/gcp/projects/cloudbank.tfvars
Original file line number Diff line number Diff line change
@@ -1,19 +1,26 @@
prefix = "cb"
project_id = "cb-1003-1696"

zone = "us-central1-b"
region = "us-central1"
zone = "us-central1-b"
region = "us-central1"
regional_cluster = false

k8s_versions = {
min_master_version : "1.26.5-gke.2100",
core_nodes_version : "1.26.5-gke.2100",
notebook_nodes_version : "1.26.4-gke.1400",
}

# FIXME: We have a temporary core node pool setup with n2-highmem-4 and
# pd-balanced. This node pool still has standard though, but has been
# cordoned.
#
core_node_machine_type = "n1-highmem-4"
enable_network_policy = true

enable_filestore = true
filestore_capacity_gb = 1024

# Multi-tenant cluster, network policy is required to enforce separation between hubs
enable_network_policy = true

regional_cluster = false

notebook_nodes = {
"user" : {
min : 0,
Expand Down
25 changes: 17 additions & 8 deletions terraform/gcp/projects/linked-earth.tfvars
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
prefix = "linked-earth"
project_id = "linked-earth-hubs"
zone = "us-central1-c"
region = "us-central1"
core_node_machine_type = "e2-highmem-4"
prefix = "linked-earth"
project_id = "linked-earth-hubs"

zone = "us-central1-c"
region = "us-central1"
regional_cluster = true

k8s_versions = {
min_master_version : "1.27.4-gke.900",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
dask_nodes_version : "1.27.4-gke.900",
}

core_node_machine_type = "n2-highmem-4"
enable_network_policy = true

# Setup a filestore for in-cluster NFS
enable_filestore = true
filestore_capacity_gb = 1024

Expand All @@ -23,12 +32,12 @@ notebook_nodes = {
"small" : {
min : 0,
max : 100,
machine_type : "e2-highmem-4"
machine_type : "n2-highmem-4"
},
"medium" : {
min : 0,
max : 100,
machine_type : "e2-highmem-16"
machine_type : "n2-highmem-16"
},
}

Expand Down
18 changes: 13 additions & 5 deletions terraform/gcp/projects/m2lines.tfvars
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
prefix = "m2lines"
project_id = "m2lines-hub"
core_node_machine_type = "n1-highmem-4"

enable_network_policy = true
prefix = "m2lines"
project_id = "m2lines-hub"

# GPUs not available in us-central1-b
zone = "us-central1-c"
region = "us-central1"
regional_cluster = true

k8s_versions = {
min_master_version : "1.27.4-gke.900",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
dask_nodes_version : "1.27.4-gke.900",
}

core_node_machine_type = "n2-highmem-4"
enable_network_policy = true


# Setup a filestore for in-cluster NFS
enable_filestore = true
filestore_capacity_gb = 2048
Expand Down
18 changes: 10 additions & 8 deletions terraform/gcp/projects/meom-ige.tfvars
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
prefix = "meom-ige"
project_id = "meom-ige-cnrs"

zone = "us-central1-b"
region = "us-central1"
zone = "us-central1-b"
region = "us-central1"
regional_cluster = false

core_node_machine_type = "n1-highmem-2"
k8s_versions = {
min_master_version : "1.27.4-gke.900",
core_nodes_version : "1.27.4-gke.900",
notebook_nodes_version : "1.27.4-gke.900",
}

# Single-tenant cluster, network policy not needed
enable_network_policy = false

regional_cluster = false
core_node_machine_type = "n2-highmem-4"
enable_network_policy = false

notebook_nodes = {
"small" : {
Expand Down Expand Up @@ -37,7 +40,6 @@ notebook_nodes = {
max : 20,
machine_type : "n1-standard-64"
},

}

# Setup a single node pool for dask workers.
Expand Down
18 changes: 11 additions & 7 deletions terraform/gcp/projects/pilot-hubs.tfvars
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
prefix = "pilot-hubs"
project_id = "two-eye-two-see"

zone = "us-central1-b"
region = "us-central1"

core_node_machine_type = "n1-highmem-4"
zone = "us-central1-b"
region = "us-central1"
regional_cluster = false

# Multi-tenant cluster, network policy is required to enforce separation between hubs
enable_network_policy = true
k8s_versions = {
min_master_version : "1.26.5-gke.2100",
core_nodes_version : "1.26.5-gke.2100",
notebook_nodes_version : "1.26.4-gke.1400",
dask_nodes_version : "1.26.5-gke.1400",
}

regional_cluster = false
core_node_machine_type = "n2-highmem-4"
enable_network_policy = true

enable_filestore = true
filestore_capacity_gb = 5120
Expand Down
12 changes: 9 additions & 3 deletions terraform/gcp/projects/qcl.tfvars
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
prefix = "qcl"
project_id = "qcl-hub"

zone = "europe-west1-d"
region = "europe-west1"
zone = "europe-west1-d"
region = "europe-west1"
regional_cluster = true

k8s_versions = {
min_master_version : "1.25.10-gke.2700",
core_nodes_version : "1.24.9-gke.3200",
notebook_nodes_version : "1.24.9-gke.3200",
}

core_node_machine_type = "n2-highmem-2"
enable_network_policy = true

# Setup a filestore for in-cluster NFS
enable_filestore = true
filestore_capacity_gb = 2048

Expand Down

0 comments on commit 31ba2d8

Please sign in to comment.