diff --git a/config/clusters/linked-earth/common.values.yaml b/config/clusters/linked-earth/common.values.yaml index f6c906830..1354a071e 100644 --- a/config/clusters/linked-earth/common.values.yaml +++ b/config/clusters/linked-earth/common.values.yaml @@ -109,7 +109,7 @@ basehub: cpu_limit: null mem_limit: null node_selector: - node.kubernetes.io/instance-type: e2-highmem-4 + node.kubernetes.io/instance-type: n2-highmem-4 - display_name: "Medium: up to 16 CPU / 128 GB RAM" description: *profile_list_description slug: medium @@ -165,7 +165,7 @@ basehub: cpu_limit: null mem_limit: null node_selector: - node.kubernetes.io/instance-type: e2-highmem-16 + node.kubernetes.io/instance-type: n2-highmem-16 dask-gateway: gateway: backend: diff --git a/config/clusters/meom-ige/cluster.yaml b/config/clusters/meom-ige/cluster.yaml index aa3de0739..0641eb08f 100644 --- a/config/clusters/meom-ige/cluster.yaml +++ b/config/clusters/meom-ige/cluster.yaml @@ -1,5 +1,5 @@ name: meom-ige -provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/pangeo-hubs-cluster/nodes?project=columbia +provider: gcp # https://console.cloud.google.com/kubernetes/clusters/details/us-central1-b/meom-ige-cluster/details?authuser=3&project=meom-ige-cnrs gcp: key: enc-deployer-credentials.secret.json project: meom-ige-cnrs diff --git a/terraform/gcp/cluster.tf b/terraform/gcp/cluster.tf index 5fd1a8e68..8cc052c33 100644 --- a/terraform/gcp/cluster.tf +++ b/terraform/gcp/cluster.tf @@ -185,7 +185,6 @@ resource "google_container_node_pool" "core" { location = google_container_cluster.cluster.location version = var.k8s_versions.core_nodes_version - initial_node_count = 1 autoscaling { min_node_count = 1 @@ -213,6 +212,12 @@ resource "google_container_node_pool" "core" { node_config { + # Balanced disks are much faster than standard disks, and much cheaper + # than SSD disks. It contributes heavily to how fast new nodes spin up, + # as images being pulled takes up a lot of new node spin up time. + # Faster disks provide faster image pulls! + disk_type = "pd-balanced" + labels = { "hub.jupyter.org/node-purpose" = "core", "k8s.dask.org/node-purpose" = "core" diff --git a/terraform/gcp/projects/2i2c-uk.tfvars b/terraform/gcp/projects/2i2c-uk.tfvars index df9336af6..39d346459 100644 --- a/terraform/gcp/projects/2i2c-uk.tfvars +++ b/terraform/gcp/projects/2i2c-uk.tfvars @@ -1,10 +1,17 @@ prefix = "two-eye-two-see-uk" project_id = "two-eye-two-see-uk" -zone = "europe-west2-b" -region = "europe-west2" +zone = "europe-west2-b" +region = "europe-west2" +regional_cluster = true -core_node_machine_type = "n1-highmem-4" +k8s_versions = { + min_master_version : "1.27.4-gke.900", + core_nodes_version : "1.27.4-gke.900", + notebook_nodes_version : "1.27.4-gke.900", +} + +core_node_machine_type = "n2-highmem-4" enable_network_policy = true # Setup a filestore for in-cluster NFS @@ -15,7 +22,7 @@ notebook_nodes = { "user" : { min : 0, max : 20, - machine_type : "n1-highmem-4" + machine_type : "n2-highmem-4" }, } diff --git a/terraform/gcp/projects/callysto.tfvars b/terraform/gcp/projects/callysto.tfvars index fb45e5c3f..431d4a645 100644 --- a/terraform/gcp/projects/callysto.tfvars +++ b/terraform/gcp/projects/callysto.tfvars @@ -1,13 +1,14 @@ prefix = "callysto" project_id = "callysto-202316" -zone = "northamerica-northeast1-b" -region = "northamerica-northeast1" +zone = "northamerica-northeast1-b" +region = "northamerica-northeast1" +regional_cluster = true k8s_versions = { - min_master_version : "1.25.6-gke.1000", - core_nodes_version : "1.25.6-gke.1000", - notebook_nodes_version : "1.25.6-gke.1000", + min_master_version : "1.27.4-gke.900", + core_nodes_version : "1.27.4-gke.900", + notebook_nodes_version : "1.27.4-gke.900", } core_node_machine_type = "n2-highmem-2" diff --git a/terraform/gcp/projects/cloudbank.tfvars b/terraform/gcp/projects/cloudbank.tfvars index 883976e3e..4acfafc83 100644 --- a/terraform/gcp/projects/cloudbank.tfvars +++ b/terraform/gcp/projects/cloudbank.tfvars @@ -1,19 +1,26 @@ prefix = "cb" project_id = "cb-1003-1696" -zone = "us-central1-b" -region = "us-central1" +zone = "us-central1-b" +region = "us-central1" +regional_cluster = false + +k8s_versions = { + min_master_version : "1.26.5-gke.2100", + core_nodes_version : "1.26.5-gke.2100", + notebook_nodes_version : "1.26.4-gke.1400", +} +# FIXME: We have a temporary core node pool setup with n2-highmem-4 and +# pd-balanced. This node pool still has standard though, but has been +# cordoned. +# core_node_machine_type = "n1-highmem-4" +enable_network_policy = true enable_filestore = true filestore_capacity_gb = 1024 -# Multi-tenant cluster, network policy is required to enforce separation between hubs -enable_network_policy = true - -regional_cluster = false - notebook_nodes = { "user" : { min : 0, diff --git a/terraform/gcp/projects/linked-earth.tfvars b/terraform/gcp/projects/linked-earth.tfvars index 170f7c00f..ee3fb5ecc 100644 --- a/terraform/gcp/projects/linked-earth.tfvars +++ b/terraform/gcp/projects/linked-earth.tfvars @@ -1,11 +1,20 @@ -prefix = "linked-earth" -project_id = "linked-earth-hubs" -zone = "us-central1-c" -region = "us-central1" -core_node_machine_type = "e2-highmem-4" +prefix = "linked-earth" +project_id = "linked-earth-hubs" + +zone = "us-central1-c" +region = "us-central1" +regional_cluster = true + +k8s_versions = { + min_master_version : "1.27.4-gke.900", + core_nodes_version : "1.27.4-gke.900", + notebook_nodes_version : "1.27.4-gke.900", + dask_nodes_version : "1.27.4-gke.900", +} + +core_node_machine_type = "n2-highmem-4" enable_network_policy = true -# Setup a filestore for in-cluster NFS enable_filestore = true filestore_capacity_gb = 1024 @@ -23,12 +32,12 @@ notebook_nodes = { "small" : { min : 0, max : 100, - machine_type : "e2-highmem-4" + machine_type : "n2-highmem-4" }, "medium" : { min : 0, max : 100, - machine_type : "e2-highmem-16" + machine_type : "n2-highmem-16" }, } diff --git a/terraform/gcp/projects/m2lines.tfvars b/terraform/gcp/projects/m2lines.tfvars index 501db1770..902d5baeb 100644 --- a/terraform/gcp/projects/m2lines.tfvars +++ b/terraform/gcp/projects/m2lines.tfvars @@ -1,14 +1,22 @@ -prefix = "m2lines" -project_id = "m2lines-hub" -core_node_machine_type = "n1-highmem-4" - -enable_network_policy = true +prefix = "m2lines" +project_id = "m2lines-hub" # GPUs not available in us-central1-b zone = "us-central1-c" region = "us-central1" regional_cluster = true +k8s_versions = { + min_master_version : "1.27.4-gke.900", + core_nodes_version : "1.27.4-gke.900", + notebook_nodes_version : "1.27.4-gke.900", + dask_nodes_version : "1.27.4-gke.900", +} + +core_node_machine_type = "n2-highmem-4" +enable_network_policy = true + + # Setup a filestore for in-cluster NFS enable_filestore = true filestore_capacity_gb = 2048 diff --git a/terraform/gcp/projects/meom-ige.tfvars b/terraform/gcp/projects/meom-ige.tfvars index 442e8c85c..144d4e461 100644 --- a/terraform/gcp/projects/meom-ige.tfvars +++ b/terraform/gcp/projects/meom-ige.tfvars @@ -1,15 +1,18 @@ prefix = "meom-ige" project_id = "meom-ige-cnrs" -zone = "us-central1-b" -region = "us-central1" +zone = "us-central1-b" +region = "us-central1" +regional_cluster = false -core_node_machine_type = "n1-highmem-2" +k8s_versions = { + min_master_version : "1.27.4-gke.900", + core_nodes_version : "1.27.4-gke.900", + notebook_nodes_version : "1.27.4-gke.900", +} -# Single-tenant cluster, network policy not needed -enable_network_policy = false - -regional_cluster = false +core_node_machine_type = "n2-highmem-4" +enable_network_policy = false notebook_nodes = { "small" : { @@ -37,7 +40,6 @@ notebook_nodes = { max : 20, machine_type : "n1-standard-64" }, - } # Setup a single node pool for dask workers. diff --git a/terraform/gcp/projects/pilot-hubs.tfvars b/terraform/gcp/projects/pilot-hubs.tfvars index 4f5028cc4..865a53c24 100644 --- a/terraform/gcp/projects/pilot-hubs.tfvars +++ b/terraform/gcp/projects/pilot-hubs.tfvars @@ -1,15 +1,19 @@ prefix = "pilot-hubs" project_id = "two-eye-two-see" -zone = "us-central1-b" -region = "us-central1" - -core_node_machine_type = "n1-highmem-4" +zone = "us-central1-b" +region = "us-central1" +regional_cluster = false -# Multi-tenant cluster, network policy is required to enforce separation between hubs -enable_network_policy = true +k8s_versions = { + min_master_version : "1.26.5-gke.2100", + core_nodes_version : "1.26.5-gke.2100", + notebook_nodes_version : "1.26.4-gke.1400", + dask_nodes_version : "1.26.5-gke.1400", +} -regional_cluster = false +core_node_machine_type = "n2-highmem-4" +enable_network_policy = true enable_filestore = true filestore_capacity_gb = 5120 diff --git a/terraform/gcp/projects/qcl.tfvars b/terraform/gcp/projects/qcl.tfvars index 5ce11208c..4d5473fb7 100644 --- a/terraform/gcp/projects/qcl.tfvars +++ b/terraform/gcp/projects/qcl.tfvars @@ -1,13 +1,19 @@ prefix = "qcl" project_id = "qcl-hub" -zone = "europe-west1-d" -region = "europe-west1" +zone = "europe-west1-d" +region = "europe-west1" +regional_cluster = true + +k8s_versions = { + min_master_version : "1.25.10-gke.2700", + core_nodes_version : "1.24.9-gke.3200", + notebook_nodes_version : "1.24.9-gke.3200", +} core_node_machine_type = "n2-highmem-2" enable_network_policy = true -# Setup a filestore for in-cluster NFS enable_filestore = true filestore_capacity_gb = 2048