diff --git a/gcp/project_setup/alert_policies/request_latency.yml b/gcp/project_setup/alert_policies/request_latency.yml new file mode 100644 index 00000000..8af88c30 --- /dev/null +++ b/gcp/project_setup/alert_policies/request_latency.yml @@ -0,0 +1,23 @@ +{ + "displayName": "${CLOUD_RUN_NAME}-maximum-request-latency", + "documentation": { + "content": "Alerts if slowest 1% of requests takes longer than 15 seconds", + "mimeType": "text/markdown" + }, + "userLabels": {}, + "conditions": [ + { + "displayName": "Cloud Run Revision - Request Latency", + "conditionPrometheusQueryLanguage": { + "duration": "300s", + "evaluationInterval": "30s", + "query": "histogram_quantile(0.99, sum(rate(run_googleapis_com:request_latencies_bucket{service_name=\"${CLOUD_RUN_NAME}\"}[5m])) by (le)) > 15000\n" + } + } + ], + "alertStrategy": {}, + "combiner": "OR", + "enabled": true, + "notificationChannels": [], + "severity": "WARNING" +} diff --git a/gcp/project_setup/alert_policies/response_error_counts.yml b/gcp/project_setup/alert_policies/response_error_counts.yml new file mode 100644 index 00000000..70241d1f --- /dev/null +++ b/gcp/project_setup/alert_policies/response_error_counts.yml @@ -0,0 +1,23 @@ +{ + "displayName": "${CLOUD_RUN_NAME}-error-response", + "documentation": { + "content": "Rate of 4xx/5xx request responses exceeds 3 per minute", + "mimeType": "text/markdown" + }, + "userLabels": {}, + "conditions": [ + { + "displayName": "New condition", + "conditionPrometheusQueryLanguage": { + "duration": "0s", + "evaluationInterval": "30s", + "query": "sum by (service_name) (rate(run_googleapis_com:request_count{monitored_resource=\"cloud_run_revision\", service_name=\"${CLOUD_RUN_NAME}\", response_code_class=~\"4..|5..\"}[1m]) * 60) > 3\n" + } + } + ], + "alertStrategy": {}, + "combiner": "OR", + "enabled": true, + "notificationChannels": [], + "severity": "ERROR" +} diff --git a/gcp/project_setup/alert_policies/startup_latency.yml b/gcp/project_setup/alert_policies/startup_latency.yml new file mode 100644 index 00000000..9be369bd --- /dev/null +++ b/gcp/project_setup/alert_policies/startup_latency.yml @@ -0,0 +1,23 @@ +{ + "displayName": "${CLOUD_RUN_NAME}-startup-latency", + "documentation": { + "content": "Alerts if start up latency for container exceeds 5 seconds", + "mimeType": "text/markdown" + }, + "userLabels": {}, + "conditions": [ + { + "displayName": "New condition", + "conditionPrometheusQueryLanguage": { + "duration": "0s", + "evaluationInterval": "30s", + "query": "run_googleapis_com:container_startup_latencies_sum{monitored_resource=\"cloud_run_revision\", service_name=\"${CLOUD_RUN_NAME}\"} > 5000\n" + } + } + ], + "alertStrategy": {}, + "combiner": "OR", + "enabled": true, + "notificationChannels": [], + "severity": "ERROR" +} diff --git a/gcp/project_setup/docs/README.md b/gcp/project_setup/docs/README.md new file mode 100644 index 00000000..d65ee3dd --- /dev/null +++ b/gcp/project_setup/docs/README.md @@ -0,0 +1,59 @@ +***Projects in Google Cloud need the following items setup:*** + +- network +- logging sink +- alert +- cloud run service cpu/memory +- product db replica/backup/zones + + +***CloudRun Template*** + +(cloud run service cpu/memory) + +When exploring CloudRun view in Google Cloud Console you can examine a yaml file representing the CloudRun resource. + +![cloudrun.yaml](./images/cloudrun_yaml.png) + +Current CD (Continuous Delivery) pipelines takes advantage of CloudRun Templates, which are used to generate CloudRun resource at deployment. + +![servicetemplate.yaml](./images/servicetemplate_yaml.png) + + +The values for template parameters are defined in devops/gcp/clouddeploy.yaml file. + +![clouddeploy.yaml](./images/clouddeploy_yaml.png) + +(network) + +Another mechanism through which some parameters might enter the template is via bash script in cloudbuild.yaml file that reads 1password variables and inserts them into the CloudRun Template. + +![cloudbuild.yaml](./images/cloudbuild_yaml.png) + +For example, VPC_CONNECTOR variable can be defined in 1password and devops/vaults.gcp.env file. + +![cloudbuild.yaml](./images/vaultsgcpenv.png) + +VPC Connector enables CloudRun to use Shared Virtual Private Cloud networks defined in Registries' Common-... projects. You can read more about the function of VPC Connector in [GCP docs](https://cloud.google.com/vpc/docs/serverless-vpc-access) + + +***gcloud scripts*** + +There is a number of scripts that can be used as a starting point to create GCP resources. +You will need to populate some variables in order to run these scripts, i.e. TARGET_PROJECT_ID (your project nameplate), ENV (e.g., dev/test/prod/tools), TAG (e.g., dev/test/prod/sandbox) + +- (logging sink / alerts) + + ../gcp_infra.sh + + The script creates an 'error' log sink and loads alert policies from alert_policies directory (which contains sample alerting policies). + +- (product db replica/backup/zones) + + ../gcp_cloudsql_instance.sh + +- (network) + + ../gcp_permissions.sh + + The script contains contains a few permissions/settings that need to be configured before CloudRun can use VPC Connector. diff --git a/gcp/project_setup/docs/images/cloudbuild_yaml.png b/gcp/project_setup/docs/images/cloudbuild_yaml.png new file mode 100644 index 00000000..031e0e69 Binary files /dev/null and b/gcp/project_setup/docs/images/cloudbuild_yaml.png differ diff --git a/gcp/project_setup/docs/images/clouddeploy_yaml.png b/gcp/project_setup/docs/images/clouddeploy_yaml.png new file mode 100644 index 00000000..eeeede38 Binary files /dev/null and b/gcp/project_setup/docs/images/clouddeploy_yaml.png differ diff --git a/gcp/project_setup/docs/images/cloudrun_yaml.png b/gcp/project_setup/docs/images/cloudrun_yaml.png new file mode 100644 index 00000000..fd6a65d8 Binary files /dev/null and b/gcp/project_setup/docs/images/cloudrun_yaml.png differ diff --git a/gcp/project_setup/docs/images/servicetemplate_yaml.png b/gcp/project_setup/docs/images/servicetemplate_yaml.png new file mode 100644 index 00000000..a83a9a5f Binary files /dev/null and b/gcp/project_setup/docs/images/servicetemplate_yaml.png differ diff --git a/gcp/project_setup/docs/images/vaultsgcpenv.png b/gcp/project_setup/docs/images/vaultsgcpenv.png new file mode 100644 index 00000000..d1f25a49 Binary files /dev/null and b/gcp/project_setup/docs/images/vaultsgcpenv.png differ diff --git a/gcp/project_setup/gcp_cloudsql_instance.sh b/gcp/project_setup/gcp_cloudsql_instance.sh new file mode 100755 index 00000000..8583a6ca --- /dev/null +++ b/gcp/project_setup/gcp_cloudsql_instance.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +# Variables +TARGET_PROJECT_ID="" +INSTANCE_NAME="" +ENV="prod" +TAG="prod" # this might be different from env, e.g. sandbox vs tools +REGION="northamerica-northeast1" +HOST_PROJECT_ID="c4hnrd" +POSTGRES_VERSION="POSTGRES_15" +SHARED_VPC_NAME="bcr-vpc" + +gcloud config set project "${TARGET_PROJECT_ID}-${ENV}" + +gcloud services enable servicenetworking.googleapis.com --project="${TARGET_PROJECT_ID}-${ENV}" + +gcloud sql instances create "${INSTANCE_NAME}-${TAG}" \ + --database-version=$POSTGRES_VERSION \ + --region=$REGION \ + --storage-type=SSD \ + --storage-auto-increase \ + --backup-start-time=00:00 \ + --enable-point-in-time-recovery \ + --retained-backups-count=7 \ + --retained-transaction-log-days=7 \ + --availability-type=regional \ + --tier=db-custom-4-16384 \ + --storage-size=100GB \ + --project="${TARGET_PROJECT_ID}-${ENV}" \ + --maintenance-window-day=MON \ + --maintenance-window-hour=4 \ + --backup-start-time=08:00 \ + --network="projects/${HOST_PROJECT_ID}-${ENV}/global/networks/${SHARED_VPC_NAME}-${TAG}" diff --git a/gcp/project_setup/gcp_infra.sh b/gcp/project_setup/gcp_infra.sh new file mode 100755 index 00000000..68ff61ad --- /dev/null +++ b/gcp/project_setup/gcp_infra.sh @@ -0,0 +1,37 @@ +#!/bin/bash +ENV="prod" +TAG="prod" # this might be different from env, e.g. sandbox vs tools +HOST_PROJECT_ID="c4hnrd" +TARGET_PROJECT_ID="" +HOST_PROJECT_ID="${HOST_PROJECT_ID}-${ENV}" +TARGET_PROJECT_ID="${TARGET_PROJECT_ID}-${ENV}" + +gcloud config set project $TARGET_PROJECT_ID + +# create log sink +gcloud logging sinks create cloud_run_errors_${TAG} \ +bigquery.googleapis.com/projects/${HOST_PROJECT_ID}/datasets/cloud_run_logs_${TAG} \ +--log-filter='resource.type="cloud_run_revision" AND severity="ERROR"' \ +--use-partitioned-tables + + +# create alerts +ALERT_POLICIES_DIR="alert_policies" + +for policy_file in "$ALERT_POLICIES_DIR"/*.yml; do + policy_name=$(basename "$policy_file") + + echo "Processing $policy_name..." + + envsubst < "$policy_file" > alert_policy.yml + gcloud alpha monitoring policies create --policy-from-file=alert_policy.yml + + if [ $? -eq 0 ]; then + echo "Successfully created alert policy from $policy_name." + else + echo "Failed to create alert policy from $policy_name." + fi + + rm -f alert_policy.yml + +done diff --git a/gcp/project_setup/gcp_permissions.sh b/gcp/project_setup/gcp_permissions.sh new file mode 100644 index 00000000..e766e919 --- /dev/null +++ b/gcp/project_setup/gcp_permissions.sh @@ -0,0 +1,30 @@ +#!/bin/bash +ENV="prod" +TAG="prod" # this might be different from env, e.g. sandbox vs tools +HOST_PROJECT_ID="c4hnrd" +TARGET_PROJECT_ID="" +SHARED_VPC_NAME="bcr-vpc" +HOST_PROJECT_ID="${HOST_PROJECT_ID}-${ENV}" +TARGET_PROJECT_ID="${TARGET_PROJECT_ID}-${ENV}" +SHARED_VPC_NAME="${SHARED_VPC_NAME}-${TAG}" + +gcloud config set project $TARGET_PROJECT_ID + +PROJECT_NUMBER=$(gcloud projects describe "${TARGET_PROJECT_ID}-${ENV}" --format="get(projectNumber)") +TARGET_PROJECT_CLOUD_RUN_SERVICE_AGENT="service-${PROJECT_NUMBER}@serverless-robot-prod.iam.gserviceaccount.com" + +# attach project to VPC +gcloud compute $SHARED_VPC_NAME associated-projects add $TARGET_PROJECT_ID \ + --host-project=$HOST_PROJECT_ID + +# enable attached Service APIs +gcloud services enable compute.googleapis.com --project=$TARGET_PROJECT_ID +gcloud services enable networkservices.googleapis.com --project=$TARGET_PROJECT_ID + +# cloudrun permissions to access Shared VPC +gcloud projects add-iam-policy-binding $HOST_PROJECT_ID \ + --member="serviceAccount:${TARGET_PROJECT_CLOUD_RUN_SERVICE_AGENT}" \ + --role="roles/vpcaccess.user" --condition=None +gcloud projects add-iam-policy-binding $HOST_PROJECT_ID \ + --member="serviceAccount:${TARGET_PROJECT_CLOUD_RUN_SERVICE_AGENT}" \ + --role="roles/compute.viewer" --condition=None