From 9b14799c1a057b0cc33d9547ddd6c6816c589214 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 9 Apr 2021 13:04:32 -0400 Subject: [PATCH 01/27] The test README file has been updated to include commands to download dependencies with the new modular structure, as well as instructions on how to run tests without building an AMI each time. --- test/README.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/test/README.md b/test/README.md index 874818e6..d5fc424d 100644 --- a/test/README.md +++ b/test/README.md @@ -26,7 +26,6 @@ clean up. ### Prerequisites - Install the latest version of [Go](https://golang.org/). -- Install [dep](https://github.com/golang/dep) for Go dependency management. - Install [Terraform](https://www.terraform.io/downloads.html). - Configure your AWS credentials using one of the [options supported by the AWS SDK](http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html). Usually, the easiest option is to @@ -35,11 +34,11 @@ clean up. ### One-time setup -Download Go dependencies using dep: +Download Go dependencies: ``` cd test -dep ensure +go get . ``` @@ -60,5 +59,15 @@ cd test go test -v -timeout 60m -run TestFoo ``` +### Running tests without building the AMI every time + +In certain cases you may want to run the deploy / validate tests without building an AMI every time. + +```bash +# Build the AMIs +SKIP_deploy="true" SKIP_validate="true" SKIP_teardown="true" go test -v -timeout 60m + +# Run the tests, which you can do over and over again +SKIP_setup_ami="true" go test -v -timeout 60m \ No newline at end of file From d2334af1044a1732865f25c18a63568f718151f9 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 12 Apr 2021 14:05:28 -0400 Subject: [PATCH 02/27] The methods for testing the Consul cluster have been updated to allow passing in an ACL token. This will allow the methods to be used for testing an ACL enabled cluster as well as the default setup. --- test/consul_helpers.go | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/test/consul_helpers.go b/test/consul_helpers.go index 15381128..f48c8585 100644 --- a/test/consul_helpers.go +++ b/test/consul_helpers.go @@ -133,7 +133,19 @@ func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examples func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terratestOptions *terraform.Options, awsRegion string) { asgName := terraform.OutputRequired(t, terratestOptions, asgNameOutputVar) nodeIpAddress := getIpAddressOfAsgInstance(t, asgName, awsRegion) - testConsulCluster(t, nodeIpAddress) + + token := "" + if terratestOptions.Vars["enable_acl"] == true { + // TODO: Actually retrieve the token here + token = "token" + } + + clientArgs := CreateConsulClientArgs{ + ipAddress: nodeIpAddress, + token: token, + } + + testConsulCluster(t, &clientArgs) } // Use a Consul client to connect to the given node and use it to verify that: @@ -141,8 +153,8 @@ func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terrates // 1. The Consul cluster has deployed // 2. The cluster has the expected number of members // 3. The cluster has elected a leader -func testConsulCluster(t *testing.T, nodeIpAddress string) { - consulClient := createConsulClient(t, nodeIpAddress) +func testConsulCluster(t *testing.T, clientArgs *CreateConsulClientArgs) { + consulClient := createConsulClient(t, clientArgs) maxRetries := 60 sleepBetweenRetries := 10 * time.Second expectedMembers := CONSUL_CLUSTER_EXAMPLE_DEFAULT_NUM_CLIENTS + CONSUL_CLUSTER_EXAMPLE_DEFAULT_NUM_SERVERS @@ -172,11 +184,18 @@ func testConsulCluster(t *testing.T, nodeIpAddress string) { logger.Logf(t, "Consul cluster is properly deployed and has elected leader %s", leader) } +type CreateConsulClientArgs struct { + ipAddress string + token string +} + // Create a Consul client -func createConsulClient(t *testing.T, ipAddress string) *api.Client { +func createConsulClient(t *testing.T, clientArgs *CreateConsulClientArgs) *api.Client { config := api.DefaultConfig() - config.Address = fmt.Sprintf("%s:8500", ipAddress) - + config.Address = fmt.Sprintf("%s:8500", clientArgs.ipAddress) + if clientArgs.token != "" { + config.Token = clientArgs.token + } client, err := api.NewClient(config) if err != nil { t.Fatalf("Failed to create Consul client due to error: %v", err) From 23bb6d805a6b33b3eff0246d89edfc5f4d7f22e7 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 16 Apr 2021 10:42:13 -0400 Subject: [PATCH 03/27] The consul-iam-policies module has been updated to create a policy for SSM parameter read / write access if that option is selected at deployment time. --- modules/consul-iam-policies/main.tf | 21 +++++++++++++++++++++ modules/consul-iam-policies/variables.tf | 20 ++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/modules/consul-iam-policies/main.tf b/modules/consul-iam-policies/main.tf index 7bb6aee5..4c07f840 100644 --- a/modules/consul-iam-policies/main.tf +++ b/modules/consul-iam-policies/main.tf @@ -34,3 +34,24 @@ data "aws_iam_policy_document" "auto_discover_cluster" { } } +data "aws_iam_policy_document" "acl_token_cluster_ssm" { + + count = var.acl_store_type == "ssm" ? 1 : 0 + + statement { + effect = "Allow" + + actions = [ "ssm:PutParameter", "ssm:GetParameters" ] + + resources = [ "arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.cluster_name}/*" ] + } +} + +data "aws_caller_identity" "current" { + +} + +data "aws_region" "current" { + +} + diff --git a/modules/consul-iam-policies/variables.tf b/modules/consul-iam-policies/variables.tf index 1bda7959..5bd91ff1 100644 --- a/modules/consul-iam-policies/variables.tf +++ b/modules/consul-iam-policies/variables.tf @@ -14,3 +14,23 @@ variable "enabled" { default = true } +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# You may provide a value for each of these parameters; in some cases they may be required if certain other options are turned on. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_name" { + description = "The name of the cluster that is being created. This is only required if you set 'acl_store_type' to 'ssm', so that the instances can write to / read from SSM parameters under the cluster name root path." + type = string + default = "" +} + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} \ No newline at end of file From bcffdeb516ba769460bf4d2aa70eff798eb01cef Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 16 Apr 2021 10:46:53 -0400 Subject: [PATCH 04/27] The cosul-cluster module now accepts an optional variable indicating if the cluster should be aset up to allow reading/writing ACL tokens to permitted stores (currently only SSM). --- modules/consul-cluster/main.tf | 5 +++-- modules/consul-cluster/variables.tf | 10 ++++++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/modules/consul-cluster/main.tf b/modules/consul-cluster/main.tf index 72f357be..3bfe8354 100644 --- a/modules/consul-cluster/main.tf +++ b/modules/consul-cluster/main.tf @@ -245,7 +245,8 @@ data "aws_iam_policy_document" "instance_role" { module "iam_policies" { source = "../consul-iam-policies" - enabled = var.enable_iam_setup - iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + enabled = var.enable_iam_setup + iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + acl_store_type = var.acl_store_type } diff --git a/modules/consul-cluster/variables.tf b/modules/consul-cluster/variables.tf index eb7a941b..3e5793c1 100644 --- a/modules/consul-cluster/variables.tf +++ b/modules/consul-cluster/variables.tf @@ -289,3 +289,13 @@ variable "protect_from_scale_in" { type = bool default = false } + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will be able to write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} From 1b0a462fe4b5471f294e2df44fda1b09bcf9f017 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 16 Apr 2021 16:09:15 -0400 Subject: [PATCH 05/27] Multiple test methods have been refactored to support testing a cluster with ACL enabled, and an example added with appropriate options set. Tests still fail since this functionality is not implemented. --- examples/example-with-acl/README.md | 23 +++ examples/example-with-acl/main.tf | 158 ++++++++++++++++++ examples/example-with-acl/outputs.tf | 59 +++++++ examples/example-with-acl/user-data-client.sh | 28 ++++ examples/example-with-acl/user-data-server.sh | 26 +++ examples/example-with-acl/variables.tf | 107 ++++++++++++ test/consul_cluster_test.go | 6 +- test/consul_cluster_with_acl_ssm_test.go | 23 +++ ...onsul_cluster_with_custom_asg_role_test.go | 6 +- test/consul_cluster_with_encryption_test.go | 6 +- test/consul_enterprise_test.go | 6 +- test/consul_helpers.go | 28 +++- 12 files changed, 456 insertions(+), 20 deletions(-) create mode 100644 examples/example-with-acl/README.md create mode 100644 examples/example-with-acl/main.tf create mode 100644 examples/example-with-acl/outputs.tf create mode 100644 examples/example-with-acl/user-data-client.sh create mode 100755 examples/example-with-acl/user-data-server.sh create mode 100644 examples/example-with-acl/variables.tf create mode 100644 test/consul_cluster_with_acl_ssm_test.go diff --git a/examples/example-with-acl/README.md b/examples/example-with-acl/README.md new file mode 100644 index 00000000..0f2de737 --- /dev/null +++ b/examples/example-with-acl/README.md @@ -0,0 +1,23 @@ +# Consul cluster with encryption example + +This folder contains a set of Terraform manifest for deploying a Consul cluster in AWS, including a Packer manifest that creates an AMI with a set of insecured certs for TLS validation, as well as installing an updated version of the `run-consul` script that accepts parameters for enabling RPC and gossip encryption. + +The resulting AMI id can then be passed as a parameter to `variables.tf`. The `enable_gossip_encryption` and `enable_rpc_encryption` variables are set to `true` by default in this example, but they don't have to be in your implementation. In this example they're passed as parameters to the `user_data` template to generate the flags passed to `run-consul` but you can use a different strategy. + +The end result of this example should be a cluster of 3 Consul servers and 3 Consul clients, all running on individual EC2 instances. If the default variables are used, both gossip and RPC encryption will be enabled. You can validate this by trying to bring up another Consul node or cluster NOT running with encryption and attempt to join the existing cluster. + +Running this example with encryption turned off and then attempt to upgrade it to use encryption is a good exercise to validate that a production cluster can be upgraded with minimal impact. + +To understand more about how Consul handles encryption or how you can upgrade to use encryption without downtime, check out the [Consul encryption documentation](https://www.consul.io/docs/agent/encryption.html). **IMPORTANT:** The certs included in this repo are **NOT** meant to be used in production. You should generate your own certs if you're running this for anything other than experimenting or testing. + +## Quick start + +To deploy a Consul cluster with encryption enabled: + +1. Create a new AMI using the Packer manifest and the certificates in the `packer` directory. +1. Modify `main.tf` to add your provider credentials, VPC/subnet ids if you need to, etc. +1. Modify `variables.tf` to customize the cluster. **NOTE:** the `gossip_encryption_key` variable must be a 16-byte key that can be generated offline with `consul keygen`. It's **NOT** a good idea to keep this key **in plain text** in source control. It should be encrypted beforehand (with something like KMS) and decrypted by Consul during boot. +1. Run `terraform init`. +1. Run `terraform apply`. +1. `ssh` into one of the boxes and make sure all nodes correctly discover each other (by running `consul members` for example). +1. You can also validate that encryption is turned on by looking at `/opt/consul/log/consul-stdout.log` and verifying you see `Encrypt: Gossip: true, TLS-Outgoing: true, TLS-Incoming: true`. \ No newline at end of file diff --git a/examples/example-with-acl/main.tf b/examples/example-with-acl/main.tf new file mode 100644 index 00000000..48f43ca0 --- /dev/null +++ b/examples/example-with-acl/main.tf @@ -0,0 +1,158 @@ +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY A CONSUL CLUSTER IN AWS +# These templates show an example of how to use the consul-cluster module to deploy Consul in AWS. We deploy two Auto +# Scaling Groups (ASGs): one with a small number of Consul server nodes and one with a larger number of Consul client +# nodes. Note that these templates assume that the AMI you provide via the ami_id input variable is built from +# the examples/example-with-encryption/packer/consul-with-certs.json Packer template. +# --------------------------------------------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# ---------------------------------------------------------------------------------------------------------------------- +terraform { + # This module is now only being tested with Terraform 0.14.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.14.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY THE CONSUL SERVER NODES +# --------------------------------------------------------------------------------------------------------------------- + +module "consul_servers" { + # When using these modules in your own templates, you will need to use a Git URL with a ref attribute that pins you + # to a specific version of the modules, such as the following example: + # source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-cluster?ref=v0.0.1" + source = "../../modules/consul-cluster" + + cluster_name = "${var.cluster_name}-server" + cluster_size = var.num_servers + instance_type = "t2.micro" + spot_price = var.spot_price + + # The EC2 Instances will use these tags to automatically discover each other and form a cluster + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + + ami_id = var.ami_id + user_data = data.template_file.user_data_server.rendered + + vpc_id = data.aws_vpc.default.id + subnet_ids = data.aws_subnet_ids.default.ids + + # TODO: Add variable enable_acl + + # To make testing easier, we allow Consul and SSH requests from any IP address here but in a production + # deployment, we strongly recommend you limit this to the IP address ranges of known, trusted servers inside your VPC. + allowed_ssh_cidr_blocks = ["0.0.0.0/0"] + + allowed_inbound_cidr_blocks = ["0.0.0.0/0"] + ssh_key_name = var.ssh_key_name + acl_store_type = var.acl_store_type + + tags = [ + { + key = "Environment" + value = "development" + propagate_at_launch = true + } + ] +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE USER DATA SCRIPT THAT WILL RUN ON EACH CONSUL SERVER EC2 INSTANCE WHEN IT'S BOOTING +# This script will configure and start Consul +# --------------------------------------------------------------------------------------------------------------------- + +data "template_file" "user_data_server" { + template = file("${path.module}/user-data-server.sh") + + vars = { + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + enable_gossip_encryption = var.enable_gossip_encryption + gossip_encryption_key = var.gossip_encryption_key + enable_rpc_encryption = var.enable_rpc_encryption + ca_path = var.ca_path + cert_file_path = var.cert_file_path + key_file_path = var.key_file_path + # TODO Add enable_acl + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY THE CONSUL CLIENT NODES +# Note that you do not have to use the consul-cluster module to deploy your clients. We do so simply because it +# provides a convenient way to deploy an Auto Scaling Group with the necessary IAM and security group permissions for +# Consul, but feel free to deploy those clients however you choose (e.g. a single EC2 Instance, a Docker cluster, etc). +# --------------------------------------------------------------------------------------------------------------------- + +module "consul_clients" { + # When using these modules in your own templates, you will need to use a Git URL with a ref attribute that pins you + # to a specific version of the modules, such as the following example: + # source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-cluster?ref=v0.0.1" + source = "../../modules/consul-cluster" + + cluster_name = "${var.cluster_name}-client" + cluster_size = var.num_clients + instance_type = "t2.micro" + spot_price = var.spot_price + + cluster_tag_key = "consul-clients" + cluster_tag_value = var.cluster_name + + ami_id = var.ami_id + user_data = data.template_file.user_data_client.rendered + + vpc_id = data.aws_vpc.default.id + subnet_ids = data.aws_subnet_ids.default.ids + + # To make testing easier, we allow Consul and SSH requests from any IP address here but in a production + # deployment, we strongly recommend you limit this to the IP address ranges of known, trusted servers inside your VPC. + allowed_ssh_cidr_blocks = ["0.0.0.0/0"] + + allowed_inbound_cidr_blocks = ["0.0.0.0/0"] + ssh_key_name = var.ssh_key_name + + acl_store_type = var.acl_store_type +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE USER DATA SCRIPT THAT WILL RUN ON EACH CONSUL CLIENT EC2 INSTANCE WHEN IT'S BOOTING +# This script will configure and start Consul +# --------------------------------------------------------------------------------------------------------------------- + +data "template_file" "user_data_client" { + template = file("${path.module}/user-data-client.sh") + + vars = { + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + enable_gossip_encryption = var.enable_gossip_encryption + gossip_encryption_key = var.gossip_encryption_key + enable_rpc_encryption = var.enable_rpc_encryption + ca_path = var.ca_path + cert_file_path = var.cert_file_path + key_file_path = var.key_file_path + # TODO Add enable_acl variable + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY CONSUL IN THE DEFAULT VPC AND SUBNETS +# Using the default VPC and subnets makes this example easy to run and test, but it means Consul is accessible from the +# public Internet. For a production deployment, we strongly recommend deploying into a custom VPC with private subnets. +# --------------------------------------------------------------------------------------------------------------------- + +data "aws_vpc" "default" { + default = var.vpc_id == null ? true : false + id = var.vpc_id +} + +data "aws_subnet_ids" "default" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_region" "current" { +} diff --git a/examples/example-with-acl/outputs.tf b/examples/example-with-acl/outputs.tf new file mode 100644 index 00000000..493b36c2 --- /dev/null +++ b/examples/example-with-acl/outputs.tf @@ -0,0 +1,59 @@ +output "num_servers" { + value = module.consul_servers.cluster_size +} + +output "asg_name_servers" { + value = module.consul_servers.asg_name +} + +output "launch_config_name_servers" { + value = module.consul_servers.launch_config_name +} + +output "iam_role_arn_servers" { + value = module.consul_servers.iam_role_arn +} + +output "iam_role_id_servers" { + value = module.consul_servers.iam_role_id +} + +output "security_group_id_servers" { + value = module.consul_servers.security_group_id +} + +output "num_clients" { + value = module.consul_clients.cluster_size +} + +output "asg_name_clients" { + value = module.consul_clients.asg_name +} + +output "launch_config_name_clients" { + value = module.consul_clients.launch_config_name +} + +output "iam_role_arn_clients" { + value = module.consul_clients.iam_role_arn +} + +output "iam_role_id_clients" { + value = module.consul_clients.iam_role_id +} + +output "security_group_id_clients" { + value = module.consul_clients.security_group_id +} + +output "aws_region" { + value = data.aws_region.current.name +} + +output "consul_servers_cluster_tag_key" { + value = module.consul_servers.cluster_tag_key +} + +output "consul_servers_cluster_tag_value" { + value = module.consul_servers.cluster_tag_value +} diff --git a/examples/example-with-acl/user-data-client.sh b/examples/example-with-acl/user-data-client.sh new file mode 100644 index 00000000..9b3404bf --- /dev/null +++ b/examples/example-with-acl/user-data-client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# This script is meant to be run in the User Data of each EC2 Instance while it's booting. The script uses the +# run-consul script to configure and start Consul in client mode. Note that this script assumes it's running in an AMI +# built from the Packer template in examples/consul-ami/consul.json. + +set -e + +# Send the log output from this script to user-data.log, syslog, and the console +# From: https://alestic.com/2010/12/ec2-user-data-output/ +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + +# These variables are passed in via Terraform template interplation +if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" ]]; then + # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file + # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an + # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" +fi + +if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then + rpc_encryption_configuration="--enable-rpc-encryption --ca-path ${ca_path} --cert-file-path ${cert_file_path} --key-file-path ${key_file_path}" +fi + +# TODO: Add option for enabling ACL + +/opt/consul/bin/run-consul --client --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration + +# You could add commands to boot your other apps here \ No newline at end of file diff --git a/examples/example-with-acl/user-data-server.sh b/examples/example-with-acl/user-data-server.sh new file mode 100755 index 00000000..90d37893 --- /dev/null +++ b/examples/example-with-acl/user-data-server.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# This script is meant to be run in the User Data of each EC2 Instance while it's booting. The script uses the +# run-consul script to configure and start Consul in server mode. Note that this script assumes it's running in an AMI +# built from the Packer template in examples/consul-ami/consul.json. + +set -e + +# Send the log output from this script to user-data.log, syslog, and the console +# From: https://alestic.com/2010/12/ec2-user-data-output/ +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + +# These variables are passed in via Terraform template interplation +if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" ]]; then + # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file + # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an + # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" +fi + +if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then + rpc_encryption_configuration="--enable-rpc-encryption --ca-path ${ca_path} --cert-file-path ${cert_file_path} --key-file-path ${key_file_path}" +fi + +# TODO: Add option for enabling ACL + +/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration \ No newline at end of file diff --git a/examples/example-with-acl/variables.tf b/examples/example-with-acl/variables.tf new file mode 100644 index 00000000..861c0312 --- /dev/null +++ b/examples/example-with-acl/variables.tf @@ -0,0 +1,107 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY +# AWS_DEFAULT_REGION + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "ami_id" { + description = "The ID of the AMI to run in the cluster. This should be an AMI built from the Packer template under examples/example-with-encryption/packer/consul-with-certs.json. To keep this example simple, we run the same AMI on both server and client nodes, but in real-world usage, your client nodes would also run your apps. If the default value is used, Terraform will look up the latest AMI build automatically." + type = string + default = null +} + +variable "cluster_name" { + description = "What to name the Consul cluster and all of its associated resources" + type = string + default = "consul-example" +} + +variable "num_servers" { + description = "The number of Consul server nodes to deploy. We strongly recommend using 3 or 5." + type = number + default = 3 +} + +variable "num_clients" { + description = "The number of Consul client nodes to deploy. You typically run the Consul client alongside your apps, so set this value to however many Instances make sense for your app code." + type = number + default = 3 +} + +variable "cluster_tag_key" { + description = "The tag the EC2 Instances will look for to automatically discover each other and form a cluster." + type = string + default = "consul-servers" +} + +variable "ssh_key_name" { + description = "The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair." + type = string + default = null +} + +variable "vpc_id" { + description = "The ID of the VPC in which the nodes will be deployed. Uses default VPC if not supplied." + type = string + default = null +} + +variable "spot_price" { + description = "The maximum hourly price to pay for EC2 Spot Instances." + type = string + default = null +} + +variable "enable_gossip_encryption" { + description = "Encrypt gossip traffic between nodes. Must also specify encryption key." + type = bool + default = false +} + +variable "enable_rpc_encryption" { + description = "Encrypt RPC traffic between nodes. Must also specify TLS certificates and keys." + type = bool + default = false +} + +variable "gossip_encryption_key" { + description = "16 byte cryptographic key to encrypt gossip traffic between nodes. Must set 'enable_gossip_encryption' to true for this to take effect. WARNING: Setting the encryption key here means it will be stored in plain text. We're doing this here to keep the example simple, but in production you should inject it more securely, e.g. retrieving it from KMS." + type = string + default = "" +} + +variable "ca_path" { + description = "Path to the directory of CA files used to verify outgoing connections." + type = string + default = "/opt/consul/tls/ca" +} + +variable "cert_file_path" { + description = "Path to the certificate file used to verify incoming connections." + type = string + default = "/opt/consul/tls/consul.crt.pem" +} + +variable "key_file_path" { + description = "Path to the certificate key used to verify incoming connections." + type = string + default = "/opt/consul/tls/consul.key.pem" +} + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will be able to write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "ssm" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} diff --git a/test/consul_cluster_test.go b/test/consul_cluster_test.go index 179f09e0..ad7dfed1 100644 --- a/test/consul_cluster_test.go +++ b/test/consul_cluster_test.go @@ -7,17 +7,17 @@ import ( // Test the example in the root folder func TestConsulClusterWithUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "",false) } // Test the example in the root folder func TestConsulClusterWithUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "",false) } // Test the example in the root folder func TestConsulClusterWithAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", "") + runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", "",false) } diff --git a/test/consul_cluster_with_acl_ssm_test.go b/test/consul_cluster_with_acl_ssm_test.go new file mode 100644 index 00000000..82a7576c --- /dev/null +++ b/test/consul_cluster_with_acl_ssm_test.go @@ -0,0 +1,23 @@ +package test + +import ( + "testing" +) + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmUbuntu16Ami(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ubuntu", "",true) +} + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmUbuntu18Ami(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ubuntu", "",true) +} + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmAmazonLinuxAmi(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ec2-user", "",true) +} diff --git a/test/consul_cluster_with_custom_asg_role_test.go b/test/consul_cluster_with_custom_asg_role_test.go index 96595f03..a2269875 100644 --- a/test/consul_cluster_with_custom_asg_role_test.go +++ b/test/consul_cluster_with_custom_asg_role_test.go @@ -11,7 +11,7 @@ func TestConsulClusterWithCustomASGRoleUbuntu16Ami(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "ubuntu16-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "") + runConsulClusterTestWithVars(t, "ubuntu16-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "",false) } func TestConsulClusterWithCustomASGRoleUbuntu18Ami(t *testing.T) { @@ -19,7 +19,7 @@ func TestConsulClusterWithCustomASGRoleUbuntu18Ami(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "ubuntu18-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "") + runConsulClusterTestWithVars(t, "ubuntu18-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "",false) } func TestConsulClusterWithCustomASGRoleAmazonLinuxAmi(t *testing.T) { @@ -27,5 +27,5 @@ func TestConsulClusterWithCustomASGRoleAmazonLinuxAmi(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "amazon-linux-2-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ec2-user", terraformVars, "") + runConsulClusterTestWithVars(t, "amazon-linux-2-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ec2-user", terraformVars, "",false) } diff --git a/test/consul_cluster_with_encryption_test.go b/test/consul_cluster_with_encryption_test.go index 2149dd3a..7a3eb8be 100644 --- a/test/consul_cluster_with_encryption_test.go +++ b/test/consul_cluster_with_encryption_test.go @@ -4,15 +4,15 @@ import "testing" func TestConsulClusterWithEncryptionUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "",false) } func TestConsulClusterWithEncryptionUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "",false) } func TestConsulClusterWithEncryptionAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ec2-user", "") + runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ec2-user", "",false) } diff --git a/test/consul_enterprise_test.go b/test/consul_enterprise_test.go index 6632f052..4bbada7e 100644 --- a/test/consul_enterprise_test.go +++ b/test/consul_enterprise_test.go @@ -8,17 +8,17 @@ import ( // Test the example in the root folder func TestConsulInstallFromURLWithUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t)) + runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t),false) } func TestConsulInstallFromURLWithUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t)) + runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t),false) } func TestConsulInstallFromURLWithAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", getUrlFromEnv(t)) + runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", getUrlFromEnv(t),false) } // To test this on circle ci you need a url set as an environment variable, CONSUL_AMI_TEMPLATE_VAR_DOWNLOAD_URL diff --git a/test/consul_helpers.go b/test/consul_helpers.go index f48c8585..2e9a8060 100644 --- a/test/consul_helpers.go +++ b/test/consul_helpers.go @@ -41,17 +41,18 @@ const AWS_DEFAULT_REGION_ENV_VAR = "AWS_DEFAULT_REGION" // 2. Building the AMI in the consul-ami example with the given build name // 3. Deploying that AMI using the consul-cluster Terraform code // 4. Checking that the Consul cluster comes up within a reasonable time period and can respond to requests -func runConsulClusterTest(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, enterpriseUrl string) { +func runConsulClusterTest(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, enterpriseUrl string, enableAcl bool) { runConsulClusterTestWithVars(t, packerBuildName, examplesFolder, packerTemplatePath, sshUser, map[string]interface{}{}, - enterpriseUrl) + enterpriseUrl, + enableAcl,) } -func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, terraformVarsMerge map[string]interface{}, enterpriseUrl string) { +func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, terraformVarsMerge map[string]interface{}, enterpriseUrl string, enableAcl bool) { // Uncomment any of the following to skip that section during the test // os.Setenv("SKIP_setup_ami", "true") // os.Setenv("SKIP_deploy", "true") @@ -122,22 +123,33 @@ func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examples } // Check the Consul servers - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion, enableAcl) // Check the Consul clients - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, enableAcl) }) } // Check that the Consul cluster comes up within a reasonable time period and can respond to requests -func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terratestOptions *terraform.Options, awsRegion string) { +func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terratestOptions *terraform.Options, awsRegion string, enableAcl bool) { asgName := terraform.OutputRequired(t, terratestOptions, asgNameOutputVar) nodeIpAddress := getIpAddressOfAsgInstance(t, asgName, awsRegion) + maxRetries := 60 + sleepBetweenRetries := 10 * time.Second + token := "" - if terratestOptions.Vars["enable_acl"] == true { + + if enableAcl { // TODO: Actually retrieve the token here - token = "token" + token = retry.DoWithRetry(t, "Check for SSM token", maxRetries, sleepBetweenRetries, func() (string,error) { + parameterName := fmt.Sprintf("/%s/token/bootstrap",terratestOptions.Vars["cluster_name"]) + token, err := aws.GetParameterE(t, awsRegion, parameterName) + if err != nil { + return "", err + } + return token, nil + }) } clientArgs := CreateConsulClientArgs{ From 54b68d16846214530734be9f390becdfecf6a595 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 16 Apr 2021 16:25:11 -0400 Subject: [PATCH 06/27] The example Packer file now includes the installation of the 'bash-commons' Gruntwork library. --- examples/consul-ami/consul.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/examples/consul-ami/consul.json b/examples/consul-ami/consul.json index 423ca71b..67787477 100644 --- a/examples/consul-ami/consul.json +++ b/examples/consul-ami/consul.json @@ -96,5 +96,12 @@ "/tmp/terraform-aws-consul/modules/setup-systemd-resolved/setup-systemd-resolved" ], "pause_before": "30s" + },{ + "type": "shell", + "inline": [ + "sudo mkdir -p /opt/gruntwork", + "git clone --branch v0.1git .4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons", + "sudo cp -r /tmp/bash-commons/modules/bash-commons/src /opt/gruntwork/bash-commons" + ] }] } From d11727eabfbaecaf8991611a01601475e917a7a1 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 19 Apr 2021 10:48:21 -0400 Subject: [PATCH 07/27] Functions to read and write ACL tokens have been added to the script to run Consul, as well as a function to calculate the name of the SSM parameter to store the bootstrap token in. --- modules/run-consul/consul-common.sh | 15 +++++++ modules/run-consul/run-consul | 64 +++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 modules/run-consul/consul-common.sh diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh new file mode 100644 index 00000000..246dccf8 --- /dev/null +++ b/modules/run-consul/consul-common.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e + +source "/opt/gruntwork/bash-commons/log.sh" +source "/opt/gruntwork/bash-commons/string.sh" +source "/opt/gruntwork/bash-commons/assert.sh" +source "/opt/gruntwork/bash-commons/aws-wrapper.sh" + +function bootstrap_acl { + + + local token + +} \ No newline at end of file diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index ffb5dece..265b87d5 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -53,6 +53,7 @@ function print_usage { echo -e " --environment\t\tA single environment variable in the key/value pair form 'KEY=\"val\"' to pass to Consul as environment variable when starting it up. Repeat this option for additional variables. Optional." echo -e " --skip-consul-config\tIf this flag is set, don't generate a Consul configuration file. Optional. Default is false." echo -e " --recursor\tThis flag provides address of upstream DNS server that is used to recursively resolve queries if they are not inside the service domain for Consul. Repeat this option for additional variables. Optional." + echo -e " --enable-acl\tThis flag will cause the cluster to bootstrap a root ACL token upon first provision, which is then stored in AWS SSM for other cluster members to retrieve." echo echo "Options for Consul Autopilot:" echo @@ -312,6 +313,8 @@ EOF ) fi + # TODO: Add step to add ACL section if --enable-acl is set, including token section if client == true + log_info "Creating default Consul configuration" local default_config_json default_config_json=$(cat < Date: Mon, 19 Apr 2021 10:51:19 -0400 Subject: [PATCH 08/27] The newly created functions have been moved to a common.sh file, which the run-consul file now sources. This makes testing functions easier and also follows the pattern of other repositories, such as the terraform-aws-couchbase one. --- modules/run-consul/consul-common.sh | 56 ++++++++++++++++++++++++++--- modules/run-consul/run-consul | 55 ++-------------------------- 2 files changed, 53 insertions(+), 58 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index 246dccf8..fea0c676 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -7,9 +7,55 @@ source "/opt/gruntwork/bash-commons/string.sh" source "/opt/gruntwork/bash-commons/assert.sh" source "/opt/gruntwork/bash-commons/aws-wrapper.sh" -function bootstrap_acl { - - - local token - +function get_acl_token_ssm_parameter_name { + local -r cluster_name="$1" + echo "/$cluster_name/token/bootstrap" +} + +function read_acl_token { + local -r cluster_name="$1" + local -r storage_type="$2" + local -r max_retries="${3:-60}" + local -r sleep_between_retries="${4:-5}" + + if [[ $storage_type == "ssm" ]]; then + local parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) + local parameters + local parameter_exists + local token + + for (( i=0; i<"$max_retries"; i++ )); do + parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption) + parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') + if [[ $parameter_exists -eq 1 ]]; then + token=$(echo $parameters | jq '.Parameters[0].Value' -r) + echo $token + return + else + log_info "Parameter $parameter_name does not yet exist." + sleep "$sleep_between_retries" + fi + done + log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." + exit 1 + else + log_error "ACL storage type '${storage_type}' is not supported." + exit 1 + fi +} + +function write_acl_token { + local -r token="$1" + local -r cluster_name="$2" + local -r storage_type="$3" + + if [$storage_type == "ssm"]; then + local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) + aws ssm put-parameter --name $parameter_name --value $token + fi + else + log_error "ACL storage type '${storage_type}' is not supported." + exit 1 + fi + } \ No newline at end of file diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 265b87d5..2fbb0df6 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -24,6 +24,8 @@ readonly DEFAULT_AUTOPILOT_SERVER_STABILIZATION_TIME="10s" readonly DEFAULT_AUTOPILOT_REDUNDANCY_ZONE_TAG="az" readonly DEFAULT_AUTOPILOT_DISABLE_UPGRADE_MIGRATION="false" +source ${SCRIPT_DIR}/consul-common.sh + function print_usage { echo echo "Usage: run-consul [OPTIONS]" @@ -420,59 +422,6 @@ function get_owner_of_path { ls -ld "$path" | awk '{print $3}' } -function get_acl_token_ssm_parameter_name { - local -r cluster_name="$1" - echo "/$cluster_name/token/bootstrap" -} - -function read_acl_token { - local -r cluster_name="$1" - local -r storage_type="$2" - local -r max_retries="${3:-60}" - local -r sleep_between_retries="${4:-5}" - - if [[ $storage_type == "ssm" ]]; then - local parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) - local parameters - local parameter_exists - local token - - for (( i=0; i<"$max_retries"; i++ )); do - parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption) - parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') - if [[ $parameter_exists -eq 1 ]]; then - token=$(echo $parameters | jq '.Parameters[0].Value' -r) - echo $token - return - else - log_info "Parameter $parameter_name does not yet exist." - sleep "$sleep_between_retries" - fi - done - log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." - exit 1 - else - log_error "ACL storage type '${storage_type}' is not supported." - exit 1 - fi -} - -function write_acl_token { - local -r token="$1" - local -r cluster_name="$2" - local -r storage_type="$3" - - if [$storage_type == "ssm"]; then - local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) - aws ssm put-parameter --name $parameter_name --value $token - fi - else - log_error "ACL storage type '${storage_type}' is not supported." - exit 1 - fi - -} - function run { local server="false" local client="false" From 92ec301ece6462760fbfd1d464a80d37d5cec113 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 19 Apr 2021 11:03:42 -0400 Subject: [PATCH 09/27] A typo in the Packer configuration file has been corrected. --- examples/consul-ami/consul.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/consul-ami/consul.json b/examples/consul-ami/consul.json index 67787477..2a784e87 100644 --- a/examples/consul-ami/consul.json +++ b/examples/consul-ami/consul.json @@ -100,7 +100,7 @@ "type": "shell", "inline": [ "sudo mkdir -p /opt/gruntwork", - "git clone --branch v0.1git .4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons", + "git clone --branch v0.1.4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons", "sudo cp -r /tmp/bash-commons/modules/bash-commons/src /opt/gruntwork/bash-commons" ] }] From 3c53eade07557636531fc8eeb8781d2856583b8e Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 19 Apr 2021 12:07:17 -0400 Subject: [PATCH 10/27] The consul-cluster module now passes the cluster name to the IAM Policy module. --- modules/consul-cluster/main.tf | 1 + modules/run-consul/consul-common.sh | 146 +++++++++++++++++++++++++++- modules/run-consul/run-consul | 138 +------------------------- 3 files changed, 148 insertions(+), 137 deletions(-) diff --git a/modules/consul-cluster/main.tf b/modules/consul-cluster/main.tf index 3bfe8354..0eaf47b8 100644 --- a/modules/consul-cluster/main.tf +++ b/modules/consul-cluster/main.tf @@ -248,5 +248,6 @@ module "iam_policies" { enabled = var.enable_iam_setup iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) acl_store_type = var.acl_store_type + cluster_name = var.cluster_name } diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index fea0c676..4593c15e 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -52,10 +52,154 @@ function write_acl_token { if [$storage_type == "ssm"]; then local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) aws ssm put-parameter --name $parameter_name --value $token - fi else log_error "ACL storage type '${storage_type}' is not supported." exit 1 fi +} + +function generate_consul_config { + local -r server="${1}" + local -r config_dir="${2}" + local -r user="${3}" + local -r cluster_tag_key="${4}" + local -r cluster_tag_value="${5}" + local -r datacenter="${6}" + local -r enable_gossip_encryption="${7}" + local -r gossip_encryption_key="${8}" + local -r enable_rpc_encryption="${9}" + local -r verify_server_hostname="${10}" + local -r ca_path="${11}" + local -r cert_file_path="${12}" + local -r key_file_path="${13}" + local -r cleanup_dead_servers="${14}" + local -r last_contact_threshold="${15}" + local -r max_trailing_logs="${16}" + local -r server_stabilization_time="${17}" + local -r redundancy_zone_tag="${18}" + local -r disable_upgrade_migration="${19}" + local -r upgrade_version_tag=${20} + local -r config_path="$config_dir/$CONSUL_CONFIG_FILE" + local -r enable_acl="${21}" + + shift 20 + local -r recursors=("$@") + + local instance_id="" + local instance_ip_address="" + local instance_region="" + # https://www.consul.io/docs/agent/options#ui-1 + local ui_config_enabled="false" + + instance_id=$(get_instance_id) + instance_ip_address=$(get_instance_ip_address) + instance_region=$(get_instance_region) + + local retry_join_json="" + if [[ -z "$cluster_tag_key" || -z "$cluster_tag_value" ]]; then + log_warn "Either the cluster tag key ($cluster_tag_key) or value ($cluster_tag_value) is empty. Will not automatically try to form a cluster based on EC2 tags." + else + retry_join_json=$(cat < "$config_path" + chown "$user:$user" "$config_path" } \ No newline at end of file diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 2fbb0df6..4c0001d7 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -213,140 +213,6 @@ function split_by_lines { done } -function generate_consul_config { - local -r server="${1}" - local -r config_dir="${2}" - local -r user="${3}" - local -r cluster_tag_key="${4}" - local -r cluster_tag_value="${5}" - local -r datacenter="${6}" - local -r enable_gossip_encryption="${7}" - local -r gossip_encryption_key="${8}" - local -r enable_rpc_encryption="${9}" - local -r verify_server_hostname="${10}" - local -r ca_path="${11}" - local -r cert_file_path="${12}" - local -r key_file_path="${13}" - local -r cleanup_dead_servers="${14}" - local -r last_contact_threshold="${15}" - local -r max_trailing_logs="${16}" - local -r server_stabilization_time="${17}" - local -r redundancy_zone_tag="${18}" - local -r disable_upgrade_migration="${19}" - local -r upgrade_version_tag=${20} - local -r config_path="$config_dir/$CONSUL_CONFIG_FILE" - - shift 20 - local -r recursors=("$@") - - local instance_id="" - local instance_ip_address="" - local instance_region="" - # https://www.consul.io/docs/agent/options#ui-1 - local ui_config_enabled="false" - - instance_id=$(get_instance_id) - instance_ip_address=$(get_instance_ip_address) - instance_region=$(get_instance_region) - - local retry_join_json="" - if [[ -z "$cluster_tag_key" || -z "$cluster_tag_value" ]]; then - log_warn "Either the cluster tag key ($cluster_tag_key) or value ($cluster_tag_value) is empty. Will not automatically try to form a cluster based on EC2 tags." - else - retry_join_json=$(cat < "$config_path" - chown "$user:$user" "$config_path" -} - function generate_systemd_config { local -r systemd_config_path="$1" local -r consul_config_dir="$2" @@ -648,7 +514,6 @@ function run { assert_not_empty "--key_file_path" "$key_file_path" fi - # Add logic to include acl section if --enable-acl is entered generate_consul_config "$server" \ "$config_dir" \ "$user" \ @@ -669,7 +534,8 @@ function run { "$redundancy_zone_tag" \ "$disable_upgrade_migration" \ "$upgrade_version_tag" \ - "${recursors[@]}" + "${recursors[@]}" \ + "$enable_acl" fi generate_systemd_config "$SYSTEMD_CONFIG_PATH" "$config_dir" "$data_dir" "$systemd_stdout" "$systemd_stderr" "$bin_dir" "$user" "${environment[@]}" From 7aa990c17b0adafc7bbbf025ad7016e64bdba86b Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 19 Apr 2021 12:07:53 -0400 Subject: [PATCH 11/27] The IAM Policy module now correctly creates the policy allowing cluster nodes to read / write appropriate SSM parameters. --- modules/consul-iam-policies/main.tf | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/modules/consul-iam-policies/main.tf b/modules/consul-iam-policies/main.tf index 4c07f840..f8501db1 100644 --- a/modules/consul-iam-policies/main.tf +++ b/modules/consul-iam-policies/main.tf @@ -47,6 +47,13 @@ data "aws_iam_policy_document" "acl_token_cluster_ssm" { } } +resource "aws_iam_role_policy" "ssm" { + count = var.acl_store_type == "ssm" ? 1 : 0 + name = "ssm-cluster" + role = var.iam_role_id + policy = data.aws_iam_policy_document.acl_token_cluster_ssm[0].json +} + data "aws_caller_identity" "current" { } From 815376a2befb4b63a1d5ca192bda5d23c4be7c29 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 19 Apr 2021 16:10:32 +0000 Subject: [PATCH 12/27] The function to write ACL tokens has been fixed to provider the '--type' parameter when writing the token to SSM by way of the 'aws ssm put-parameter' call. --- modules/run-consul/consul-common.sh | 221 +++++++++++++++++++++++++++- 1 file changed, 218 insertions(+), 3 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index 4593c15e..bbb4f7ab 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -1,12 +1,227 @@ #!/bin/bash -set -e +# set -e source "/opt/gruntwork/bash-commons/log.sh" source "/opt/gruntwork/bash-commons/string.sh" source "/opt/gruntwork/bash-commons/assert.sh" source "/opt/gruntwork/bash-commons/aws-wrapper.sh" +function log { + local -r level="$1" + local -r message="$2" + local -r timestamp=$(date +"%Y-%m-%d %H:%M:%S") + >&2 echo -e "${timestamp} [${level}] [$SCRIPT_NAME] ${message}" +} + +function log_info { + local -r message="$1" + log "INFO" "$message" +} + +function log_warn { + local -r message="$1" + log "WARN" "$message" +} + +function log_error { + local -r message="$1" + log "ERROR" "$message" +} + +# Based on code from: http://stackoverflow.com/a/16623897/483528 +function strip_prefix { + local -r str="$1" + local -r prefix="$2" + echo "${str#$prefix}" +} + +function assert_not_empty { + local -r arg_name="$1" + local -r arg_value="$2" + + if [[ -z "$arg_value" ]]; then + log_error "The value for '$arg_name' cannot be empty" + print_usage + exit 1 + fi +} + +function lookup_path_in_instance_metadata { + local -r path="$1" + curl --silent --show-error --location "$EC2_INSTANCE_METADATA_URL/$path/" +} + +function lookup_path_in_instance_dynamic_data { + local -r path="$1" + curl --silent --show-error --location "$EC2_INSTANCE_DYNAMIC_DATA_URL/$path/" +} + +function get_instance_ip_address { + lookup_path_in_instance_metadata "local-ipv4" +} + +function get_instance_id { + lookup_path_in_instance_metadata "instance-id" +} + +function get_instance_region { + lookup_path_in_instance_dynamic_data "instance-identity/document" | jq -r ".region" +} + +function get_instance_tags { + local -r instance_id="$1" + local -r instance_region="$2" + local tags="" + local count_tags="" + + log_info "Looking up tags for Instance $instance_id in $instance_region" + for (( i=1; i<="$MAX_RETRIES"; i++ )); do + tags=$(aws ec2 describe-tags \ + --region "$instance_region" \ + --filters "Name=resource-type,Values=instance" "Name=resource-id,Values=${instance_id}") + count_tags=$(echo $tags | jq -r ".Tags? | length") + if [[ "$count_tags" -gt 0 ]]; then + log_info "This Instance $instance_id in $instance_region has Tags." + echo "$tags" + return + else + log_warn "This Instance $instance_id in $instance_region does not have any Tags." + log_warn "Will sleep for $SLEEP_BETWEEN_RETRIES_SEC seconds and try again." + sleep "$SLEEP_BETWEEN_RETRIES_SEC" + fi + done + + log_error "Could not find Instance Tags for $instance_id in $instance_region after $MAX_RETRIES retries." + exit 1 +} + +function get_asg_size { + local -r asg_name="$1" + local -r aws_region="$2" + local asg_json="" + + log_info "Looking up the size of the Auto Scaling Group $asg_name in $aws_region" + asg_json=$(aws autoscaling describe-auto-scaling-groups --region "$aws_region" --auto-scaling-group-names "$asg_name") + echo "$asg_json" | jq -r '.AutoScalingGroups[0].DesiredCapacity' +} + +function get_cluster_size { + local -r instance_tags="$1" + local -r aws_region="$2" + + local asg_name="" + asg_name=$(get_tag_value "$instance_tags" "$AWS_ASG_TAG_KEY") + if [[ -z "$asg_name" ]]; then + log_warn "This EC2 Instance does not appear to be part of an Auto Scaling Group, so cannot determine cluster size. Setting cluster size to 1." + echo 1 + else + get_asg_size "$asg_name" "$aws_region" + fi +} + +# Get the value for a specific tag from the tags JSON returned by the AWS describe-tags: +# https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-tags.html +function get_tag_value { + local -r tags="$1" + local -r tag_key="$2" + + echo "$tags" | jq -r ".Tags[] | select(.Key == \"$tag_key\") | .Value" +} + +function assert_is_installed { + local -r name="$1" + + if [[ ! $(command -v ${name}) ]]; then + log_error "The binary '$name' is required by this script but is not installed or in the system's PATH." + exit 1 + fi +} + +function split_by_lines { + local prefix="$1" + shift + + for var in "$@"; do + echo "${prefix}${var}" + done +} + +function generate_systemd_config { + local -r systemd_config_path="$1" + local -r consul_config_dir="$2" + local -r consul_data_dir="$3" + local -r consul_systemd_stdout="$4" + local -r consul_systemd_stderr="$5" + local -r consul_bin_dir="$6" + local -r consul_user="$7" + shift 7 + local -r environment=("$@") + local -r config_path="$consul_config_dir/$CONSUL_CONFIG_FILE" + + log_info "Creating systemd config file to run Consul in $systemd_config_path" + + local -r unit_config=$(cat < "$systemd_config_path" + echo -e "$service_config" >> "$systemd_config_path" + echo -e "$log_config" >> "$systemd_config_path" + echo -e "$install_config" >> "$systemd_config_path" +} + +function start_consul { + log_info "Reloading systemd config and starting Consul" + + sudo systemctl daemon-reload + sudo systemctl enable consul.service + sudo systemctl restart consul.service +} + +# Based on: http://unix.stackexchange.com/a/7732/215969 +function get_owner_of_path { + local -r path="$1" + ls -ld "$path" | awk '{print $3}' +} + function get_acl_token_ssm_parameter_name { local -r cluster_name="$1" echo "/$cluster_name/token/bootstrap" @@ -49,9 +264,9 @@ function write_acl_token { local -r cluster_name="$2" local -r storage_type="$3" - if [$storage_type == "ssm"]; then + if [[ $storage_type == "ssm" ]]; then local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) - aws ssm put-parameter --name $parameter_name --value $token + aws ssm put-parameter --name $parameter_name --value $token --type SecureString else log_error "ACL storage type '${storage_type}' is not supported." exit 1 From 60234ee044a30c33075f718925622268e8272837 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 19 Apr 2021 16:39:47 +0000 Subject: [PATCH 13/27] The run-consul script now correctly inserts the ACL configuration when '--enable-acl' is set. --- modules/run-consul/consul-common.sh | 3 +- modules/run-consul/run-consul | 228 +--------------------------- 2 files changed, 9 insertions(+), 222 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index bbb4f7ab..099e9b3a 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -298,7 +298,7 @@ function generate_consul_config { local -r config_path="$config_dir/$CONSUL_CONFIG_FILE" local -r enable_acl="${21}" - shift 20 + shift 21 local -r recursors=("$@") local instance_id="" @@ -401,6 +401,7 @@ EOF "node_name": "$instance_id", $recursors_config $retry_join_json + $acl_configuration "server": $server, $gossip_encryption_configuration $rpc_encryption_configuration diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 4c0001d7..62b50202 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -73,221 +73,6 @@ function print_usage { echo " run-consul --server --config-dir /custom/path/to/consul/config" } -function log { - local -r level="$1" - local -r message="$2" - local -r timestamp=$(date +"%Y-%m-%d %H:%M:%S") - >&2 echo -e "${timestamp} [${level}] [$SCRIPT_NAME] ${message}" -} - -function log_info { - local -r message="$1" - log "INFO" "$message" -} - -function log_warn { - local -r message="$1" - log "WARN" "$message" -} - -function log_error { - local -r message="$1" - log "ERROR" "$message" -} - -# Based on code from: http://stackoverflow.com/a/16623897/483528 -function strip_prefix { - local -r str="$1" - local -r prefix="$2" - echo "${str#$prefix}" -} - -function assert_not_empty { - local -r arg_name="$1" - local -r arg_value="$2" - - if [[ -z "$arg_value" ]]; then - log_error "The value for '$arg_name' cannot be empty" - print_usage - exit 1 - fi -} - -function lookup_path_in_instance_metadata { - local -r path="$1" - curl --silent --show-error --location "$EC2_INSTANCE_METADATA_URL/$path/" -} - -function lookup_path_in_instance_dynamic_data { - local -r path="$1" - curl --silent --show-error --location "$EC2_INSTANCE_DYNAMIC_DATA_URL/$path/" -} - -function get_instance_ip_address { - lookup_path_in_instance_metadata "local-ipv4" -} - -function get_instance_id { - lookup_path_in_instance_metadata "instance-id" -} - -function get_instance_region { - lookup_path_in_instance_dynamic_data "instance-identity/document" | jq -r ".region" -} - -function get_instance_tags { - local -r instance_id="$1" - local -r instance_region="$2" - local tags="" - local count_tags="" - - log_info "Looking up tags for Instance $instance_id in $instance_region" - for (( i=1; i<="$MAX_RETRIES"; i++ )); do - tags=$(aws ec2 describe-tags \ - --region "$instance_region" \ - --filters "Name=resource-type,Values=instance" "Name=resource-id,Values=${instance_id}") - count_tags=$(echo $tags | jq -r ".Tags? | length") - if [[ "$count_tags" -gt 0 ]]; then - log_info "This Instance $instance_id in $instance_region has Tags." - echo "$tags" - return - else - log_warn "This Instance $instance_id in $instance_region does not have any Tags." - log_warn "Will sleep for $SLEEP_BETWEEN_RETRIES_SEC seconds and try again." - sleep "$SLEEP_BETWEEN_RETRIES_SEC" - fi - done - - log_error "Could not find Instance Tags for $instance_id in $instance_region after $MAX_RETRIES retries." - exit 1 -} - -function get_asg_size { - local -r asg_name="$1" - local -r aws_region="$2" - local asg_json="" - - log_info "Looking up the size of the Auto Scaling Group $asg_name in $aws_region" - asg_json=$(aws autoscaling describe-auto-scaling-groups --region "$aws_region" --auto-scaling-group-names "$asg_name") - echo "$asg_json" | jq -r '.AutoScalingGroups[0].DesiredCapacity' -} - -function get_cluster_size { - local -r instance_tags="$1" - local -r aws_region="$2" - - local asg_name="" - asg_name=$(get_tag_value "$instance_tags" "$AWS_ASG_TAG_KEY") - if [[ -z "$asg_name" ]]; then - log_warn "This EC2 Instance does not appear to be part of an Auto Scaling Group, so cannot determine cluster size. Setting cluster size to 1." - echo 1 - else - get_asg_size "$asg_name" "$aws_region" - fi -} - -# Get the value for a specific tag from the tags JSON returned by the AWS describe-tags: -# https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-tags.html -function get_tag_value { - local -r tags="$1" - local -r tag_key="$2" - - echo "$tags" | jq -r ".Tags[] | select(.Key == \"$tag_key\") | .Value" -} - -function assert_is_installed { - local -r name="$1" - - if [[ ! $(command -v ${name}) ]]; then - log_error "The binary '$name' is required by this script but is not installed or in the system's PATH." - exit 1 - fi -} - -function split_by_lines { - local prefix="$1" - shift - - for var in "$@"; do - echo "${prefix}${var}" - done -} - -function generate_systemd_config { - local -r systemd_config_path="$1" - local -r consul_config_dir="$2" - local -r consul_data_dir="$3" - local -r consul_systemd_stdout="$4" - local -r consul_systemd_stderr="$5" - local -r consul_bin_dir="$6" - local -r consul_user="$7" - shift 7 - local -r environment=("$@") - local -r config_path="$consul_config_dir/$CONSUL_CONFIG_FILE" - - log_info "Creating systemd config file to run Consul in $systemd_config_path" - - local -r unit_config=$(cat < "$systemd_config_path" - echo -e "$service_config" >> "$systemd_config_path" - echo -e "$log_config" >> "$systemd_config_path" - echo -e "$install_config" >> "$systemd_config_path" -} - -function start_consul { - log_info "Reloading systemd config and starting Consul" - - sudo systemctl daemon-reload - sudo systemctl enable consul.service - sudo systemctl restart consul.service -} - -# Based on: http://unix.stackexchange.com/a/7732/215969 -function get_owner_of_path { - local -r path="$1" - ls -ld "$path" | awk '{print $3}' -} - function run { local server="false" local client="false" @@ -444,6 +229,9 @@ function run { --skip-consul-config) skip_consul_config="true" ;; + --enable-acl) + enable_acl="true" + ;; --recursor) assert_not_empty "$key" "$2" recursors+=("$2") @@ -453,11 +241,6 @@ function run { print_usage exit ;; - --enable-acl) - assert_not_empty "$key" "$2" - enable_acl="true" - shift - ;; *) log_error "Unrecognized argument: $key" print_usage @@ -514,6 +297,7 @@ function run { assert_not_empty "--key_file_path" "$key_file_path" fi + log_info "Creating Consul configuration" generate_consul_config "$server" \ "$config_dir" \ "$user" \ @@ -534,11 +318,13 @@ function run { "$redundancy_zone_tag" \ "$disable_upgrade_migration" \ "$upgrade_version_tag" \ - "${recursors[@]}" \ "$enable_acl" + "${recursors[@]}" fi + log_info "Creating Consul systemd file" generate_systemd_config "$SYSTEMD_CONFIG_PATH" "$config_dir" "$data_dir" "$systemd_stdout" "$systemd_stderr" "$bin_dir" "$user" "${environment[@]}" + log_info "Starting Consul" start_consul } From 580e035418c597762b984e76856d1bb2a60e7a29 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 19 Apr 2021 16:58:31 +0000 Subject: [PATCH 14/27] The run-consul script now calculates a rally point instance if 'enable_acl' is set. --- modules/run-consul/run-consul | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 62b50202..f400dae3 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -326,6 +326,18 @@ function run { generate_systemd_config "$SYSTEMD_CONFIG_PATH" "$config_dir" "$data_dir" "$systemd_stdout" "$systemd_stderr" "$bin_dir" "$user" "${environment[@]}" log_info "Starting Consul" start_consul + + if [[ "$enable_acl" == "true" ]]; then + + local -r asg_name=$(aws_wrapper_get_asg_name $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) + local -r aws_region=$(aws_get_instance_region) + + # Calculate the rally point server for the ASG + local -r rally_point_hostname=$(aws_wrapper_get_asg_rally_point $asg_name $aws_region "false") + log_info "Calculated rally point instance is $rally_point_hostname." + local -r local_hostname=$(aws_wrapper_get_hostname) + log_info "Local hostname is $local_hostname" + fi } run "$@" From 6d4ac745994ad180ecfe0512acfc059c11e5e240 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 19 Apr 2021 18:02:38 +0000 Subject: [PATCH 15/27] The ACL example has been updated so the user-data scripts call the run-consul script with the '--enable-acl' flag set. --- examples/example-with-acl/user-data-client.sh | 2 +- examples/example-with-acl/user-data-server.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/example-with-acl/user-data-client.sh b/examples/example-with-acl/user-data-client.sh index 9b3404bf..e4f0f7c4 100644 --- a/examples/example-with-acl/user-data-client.sh +++ b/examples/example-with-acl/user-data-client.sh @@ -14,7 +14,7 @@ if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. - gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" --enable-acl fi if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then diff --git a/examples/example-with-acl/user-data-server.sh b/examples/example-with-acl/user-data-server.sh index 90d37893..bbfdffd6 100755 --- a/examples/example-with-acl/user-data-server.sh +++ b/examples/example-with-acl/user-data-server.sh @@ -23,4 +23,4 @@ fi # TODO: Add option for enabling ACL -/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration \ No newline at end of file +/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl \ No newline at end of file From cf287370a6217e405b41f38b9581280ece9d742c Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 19 Apr 2021 18:04:01 +0000 Subject: [PATCH 16/27] The install-consul script has been updated to include copying the 'consul-commons.sh' file when installing Consul. --- modules/install-consul/install-consul | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index 789ebbb4..cd720eff 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -17,6 +17,8 @@ readonly SYSTEM_BIN_DIR="/usr/local/bin" readonly SCRIPT_NAME="$(basename "$0")" +# TODO: Add step to check if bash-commons is installed + function print_usage { echo echo "Usage: install-consul [OPTIONS]" @@ -200,6 +202,8 @@ function install_binary { log_info "Copying Consul run script to $run_consul_dest_path" sudo cp "$SCRIPT_DIR/../run-consul/run-consul" "$run_consul_dest_path" + log_info "Copying common scripts to $run_consul_dest_path" + sudo cp "$SCRIPT_DIR/../run-consul/consul-common.sh" "$run_consul_dest_path" sudo chown "$username:$username" "$run_consul_dest_path" sudo chmod a+x "$run_consul_dest_path" } From 7d387c2ce210112d35edb10762f37fa82c0a8e5b Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Tue, 20 Apr 2021 12:44:18 -0400 Subject: [PATCH 17/27] The run-consul script now generates a root ACL token upon start-up and persists it to SSM storage. --- modules/consul-cluster/main.tf | 8 +++---- modules/consul-iam-policies/main.tf | 2 +- modules/consul-iam-policies/variables.tf | 4 ++-- modules/install-consul/install-consul | 2 +- modules/run-consul/consul-common.sh | 29 ++++++++++++++++++++---- modules/run-consul/run-consul | 9 +++++++- 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/modules/consul-cluster/main.tf b/modules/consul-cluster/main.tf index 0eaf47b8..e2fc1248 100644 --- a/modules/consul-cluster/main.tf +++ b/modules/consul-cluster/main.tf @@ -245,9 +245,9 @@ data "aws_iam_policy_document" "instance_role" { module "iam_policies" { source = "../consul-iam-policies" - enabled = var.enable_iam_setup - iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) - acl_store_type = var.acl_store_type - cluster_name = var.cluster_name + enabled = var.enable_iam_setup + iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + acl_store_type = var.acl_store_type + cluster_tag_value = var.cluster_tag_value } diff --git a/modules/consul-iam-policies/main.tf b/modules/consul-iam-policies/main.tf index f8501db1..419cb1b3 100644 --- a/modules/consul-iam-policies/main.tf +++ b/modules/consul-iam-policies/main.tf @@ -43,7 +43,7 @@ data "aws_iam_policy_document" "acl_token_cluster_ssm" { actions = [ "ssm:PutParameter", "ssm:GetParameters" ] - resources = [ "arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.cluster_name}/*" ] + resources = [ "arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.cluster_tag_value}/*" ] } } diff --git a/modules/consul-iam-policies/variables.tf b/modules/consul-iam-policies/variables.tf index 5bd91ff1..f4695b39 100644 --- a/modules/consul-iam-policies/variables.tf +++ b/modules/consul-iam-policies/variables.tf @@ -19,8 +19,8 @@ variable "enabled" { # You may provide a value for each of these parameters; in some cases they may be required if certain other options are turned on. # --------------------------------------------------------------------------------------------------------------------- -variable "cluster_name" { - description = "The name of the cluster that is being created. This is only required if you set 'acl_store_type' to 'ssm', so that the instances can write to / read from SSM parameters under the cluster name root path." +variable "cluster_tag_value" { + description = "The EC2 tag value used to identify cluster members. This is only required if you set 'acl_store_type' to 'ssm', so that the instances can write to / read from SSM parameters under the correct path." type = string default = "" } diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index cd720eff..14878bc3 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -203,7 +203,7 @@ function install_binary { log_info "Copying Consul run script to $run_consul_dest_path" sudo cp "$SCRIPT_DIR/../run-consul/run-consul" "$run_consul_dest_path" log_info "Copying common scripts to $run_consul_dest_path" - sudo cp "$SCRIPT_DIR/../run-consul/consul-common.sh" "$run_consul_dest_path" + sudo cp "$SCRIPT_DIR/../run-consul/consul-common.sh" "$bin_dir/consul-common.sh" sudo chown "$username:$username" "$run_consul_dest_path" sudo chmod a+x "$run_consul_dest_path" } diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index 099e9b3a..92177931 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -1,6 +1,6 @@ #!/bin/bash -# set -e +set -e source "/opt/gruntwork/bash-commons/log.sh" source "/opt/gruntwork/bash-commons/string.sh" @@ -262,11 +262,12 @@ function read_acl_token { function write_acl_token { local -r token="$1" local -r cluster_name="$2" - local -r storage_type="$3" + local -r aws_region="$3" + local -r storage_type="$4" if [[ $storage_type == "ssm" ]]; then local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) - aws ssm put-parameter --name $parameter_name --value $token --type SecureString + aws ssm put-parameter --name $parameter_name --value $token --type SecureString --region $aws_region else log_error "ACL storage type '${storage_type}' is not supported." exit 1 @@ -377,7 +378,6 @@ EOF ) fi - # INPROGRESS: Add step to add ACL section if --enable-acl is set, including token section if client == true local acl_configuration="" if [[ "$enable_acl" == "true" ]]; then log_info "Creating ACL configuration" @@ -418,4 +418,25 @@ EOF log_info "Installing Consul config file in $config_path" echo "$default_config_json" | jq '.' > "$config_path" chown "$user:$user" "$config_path" +} + +function generate_bootstrap_acl_token { + local -r max_retries="$1" + local -r sleep_between_retries="$2" + + local token + + for (( i=0; i<"$max_retries"; i++ )); do + token=$(consul acl bootstrap -format=json | jq '.SecretID' -r) + if [[ "$token" == "" ]]; then + log_info "Token could not be obtained, retrying." + sleep $sleep_between_retries + else + echo $token + return + fi + done + + log_error "Unable to obtain ACL token. Aborting." + exit 1 } \ No newline at end of file diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index f400dae3..4087dd14 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -328,7 +328,6 @@ function run { start_consul if [[ "$enable_acl" == "true" ]]; then - local -r asg_name=$(aws_wrapper_get_asg_name $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) local -r aws_region=$(aws_get_instance_region) @@ -337,6 +336,14 @@ function run { log_info "Calculated rally point instance is $rally_point_hostname." local -r local_hostname=$(aws_wrapper_get_hostname) log_info "Local hostname is $local_hostname" + + if [[ "$rally_point_hostname" == "$local_hostname" ]]; then + log_info "Generating bootstrap ACL token" + local -r bootstrap_token=$(generate_bootstrap_acl_token $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) + log_info "Persisting bootstrap token to SSM parameter" + write_acl_token $bootstrap_token $cluster_tag_value $aws_region "ssm" + # TODO: Add step to create agent token and store it + fi fi } From 254313fb9e9e756fa323801c0c4bc6bbdac3884b Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Tue, 20 Apr 2021 14:33:24 -0400 Subject: [PATCH 18/27] The run-consul command now checks whether the bootstrap token already exists before attempting to create it. --- modules/run-consul/consul-common.sh | 27 +++++++++++++++++---------- modules/run-consul/run-consul | 15 +++++++++++---- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index 92177931..bcb5923d 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -224,23 +224,27 @@ function get_owner_of_path { function get_acl_token_ssm_parameter_name { local -r cluster_name="$1" - echo "/$cluster_name/token/bootstrap" + local -r token_name="${2:-bootstrap}" + echo "/$cluster_name/token/$token_name" } function read_acl_token { local -r cluster_name="$1" - local -r storage_type="$2" - local -r max_retries="${3:-60}" - local -r sleep_between_retries="${4:-5}" + local -r token_name="${2:-bootstrap}" + local -r aws_region="$3" + local -r storage_type="$4" + local -r max_retries="${5:-60}" + local -r sleep_between_retries="${6:-5}" + local -r ignore_error="${7:-false}" if [[ $storage_type == "ssm" ]]; then - local parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) + local parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name $token_name) local parameters local parameter_exists local token for (( i=0; i<"$max_retries"; i++ )); do - parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption) + parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption --region $aws_region) parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') if [[ $parameter_exists -eq 1 ]]; then token=$(echo $parameters | jq '.Parameters[0].Value' -r) @@ -252,7 +256,9 @@ function read_acl_token { fi done log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." - exit 1 + if [[ "$ignore_error" == "false" ]]; then + exit 1 + fi else log_error "ACL storage type '${storage_type}' is not supported." exit 1 @@ -262,11 +268,12 @@ function read_acl_token { function write_acl_token { local -r token="$1" local -r cluster_name="$2" - local -r aws_region="$3" - local -r storage_type="$4" + local -r token_name="${3:-bootstrap}" + local -r aws_region="$4" + local -r storage_type="$5" if [[ $storage_type == "ssm" ]]; then - local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name) + local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name $token_name) aws ssm put-parameter --name $parameter_name --value $token --type SecureString --region $aws_region else log_error "ACL storage type '${storage_type}' is not supported." diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 4087dd14..29da4851 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -338,10 +338,17 @@ function run { log_info "Local hostname is $local_hostname" if [[ "$rally_point_hostname" == "$local_hostname" ]]; then - log_info "Generating bootstrap ACL token" - local -r bootstrap_token=$(generate_bootstrap_acl_token $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) - log_info "Persisting bootstrap token to SSM parameter" - write_acl_token $bootstrap_token $cluster_tag_value $aws_region "ssm" + log_info "Checking if bootstrap token already exists" + local -r existing_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region "ssm" 1 0 "true") + + if [[ -z "$existing_token" ]]; then + log_info "Generating bootstrap ACL token" + local -r bootstrap_token=$(generate_bootstrap_acl_token $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) + log_info "Persisting bootstrap token to SSM parameter" + write_acl_token $bootstrap_token $cluster_tag_value "bootstrap" $aws_region "ssm" + else + log_info "Bootstrap token already exists, skipping" + fi # TODO: Add step to create agent token and store it fi fi From 53a4fd5974780f506fc8a744d63dde36f9a6adf0 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Wed, 21 Apr 2021 10:35:23 -0400 Subject: [PATCH 19/27] The run-consul script now creates agent tokens and sets the local agent to use them upon start-up. --- modules/run-consul/consul-common.sh | 67 ++++++++++++++++++++++++++++- modules/run-consul/run-consul | 23 +++++++++- 2 files changed, 87 insertions(+), 3 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index bcb5923d..ec5f1be2 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -390,7 +390,9 @@ EOF log_info "Creating ACL configuration" acl_configuration=$(cat < Date: Tue, 27 Apr 2021 12:14:11 -0400 Subject: [PATCH 20/27] The run-consul script will now only perform ACL bootstrap activities if the '--server' flag is set. --- modules/run-consul/run-consul | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 365d3d17..3b2e98de 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -344,7 +344,7 @@ function run { log_info "Checking if bootstrap token already exists" local -r existing_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region "ssm" 1 0 "true") - if [[ -z "$existing_token" ]]; then + if [[ "$existing_token" == "" ]] && [ "${server}" == "true" ]; then log_info "Generating bootstrap ACL token" bootstrap_token=$(generate_bootstrap_acl_token $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) log_info "Persisting bootstrap token to SSM parameter" From 5561dd27194c79111d585bafe3d0523eb1b65937 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Tue, 27 Apr 2021 20:27:35 -0400 Subject: [PATCH 21/27] The tests for Consul clients have been updated to ignore ACL configuration, since agents currently pass through their token uand so will fail the test for empty members. --- test/consul_helpers.go | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/test/consul_helpers.go b/test/consul_helpers.go index 2e9a8060..fd30864e 100644 --- a/test/consul_helpers.go +++ b/test/consul_helpers.go @@ -126,7 +126,7 @@ func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examples checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion, enableAcl) // Check the Consul clients - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, enableAcl) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, false) }) } @@ -193,6 +193,25 @@ func testConsulCluster(t *testing.T, clientArgs *CreateConsulClientArgs) { return leader, nil }) + if clientArgs.token != "" { + logger.Logf(t, "Attempting to retrieve members without token") + consulClient = createConsulClient(t, &CreateConsulClientArgs{ + ipAddress: clientArgs.ipAddress, + token: "", + }) + leader = retry.DoWithRetry(t, "Check for empty members with no token", maxRetries, sleepBetweenRetries, func() (string, error) { + members, err := consulClient.Agent().Members(false) + if err != nil { + return "", err + } + if len(members) != 0 { + return "",fmt.Errorf("expected an empty member list when not using token, found %d members instead", len(members)) + } + + return "",nil + }) + } + logger.Logf(t, "Consul cluster is properly deployed and has elected leader %s", leader) } From 4a93734ae33c11cd544b5b45e6d9aed5e7308cdd Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Wed, 28 Apr 2021 11:10:07 -0400 Subject: [PATCH 22/27] Documentation for the ACL enabled example and the run-consul script have been updated. --- examples/example-with-acl/README.md | 21 +++++++-------------- examples/example-with-acl/variables.tf | 2 +- modules/run-consul/README.md | 2 ++ 3 files changed, 10 insertions(+), 15 deletions(-) diff --git a/examples/example-with-acl/README.md b/examples/example-with-acl/README.md index 0f2de737..298e67bf 100644 --- a/examples/example-with-acl/README.md +++ b/examples/example-with-acl/README.md @@ -1,23 +1,16 @@ -# Consul cluster with encryption example +# Consul cluster with ACL example -This folder contains a set of Terraform manifest for deploying a Consul cluster in AWS, including a Packer manifest that creates an AMI with a set of insecured certs for TLS validation, as well as installing an updated version of the `run-consul` script that accepts parameters for enabling RPC and gossip encryption. +This folder contains a set of Terraform manifest for deploying a Consul cluster in AWS which has [ACL](https://www.consul.io/docs/security/acl) enabled. The root bootstrap token is stored in an [AWS Systems Manager Parameter](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) so that other nodes can retrieve it and create agent tokens for themselves. -The resulting AMI id can then be passed as a parameter to `variables.tf`. The `enable_gossip_encryption` and `enable_rpc_encryption` variables are set to `true` by default in this example, but they don't have to be in your implementation. In this example they're passed as parameters to the `user_data` template to generate the flags passed to `run-consul` but you can use a different strategy. - -The end result of this example should be a cluster of 3 Consul servers and 3 Consul clients, all running on individual EC2 instances. If the default variables are used, both gossip and RPC encryption will be enabled. You can validate this by trying to bring up another Consul node or cluster NOT running with encryption and attempt to join the existing cluster. - -Running this example with encryption turned off and then attempt to upgrade it to use encryption is a good exercise to validate that a production cluster can be upgraded with minimal impact. - -To understand more about how Consul handles encryption or how you can upgrade to use encryption without downtime, check out the [Consul encryption documentation](https://www.consul.io/docs/agent/encryption.html). **IMPORTANT:** The certs included in this repo are **NOT** meant to be used in production. You should generate your own certs if you're running this for anything other than experimenting or testing. +The end result of this example should be a cluster of 3 Consul servers and 3 Consul clients, all running on individual EC2 instances. ## Quick start -To deploy a Consul cluster with encryption enabled: +To deploy a Consul cluster with ACL enabled: -1. Create a new AMI using the Packer manifest and the certificates in the `packer` directory. +1. Create a new AMI using the Packer manifest in the [`examples/consul-ami`](../consul-ami) directory. Make note of the resulting AMI ID as you will need that for step 3. 1. Modify `main.tf` to add your provider credentials, VPC/subnet ids if you need to, etc. -1. Modify `variables.tf` to customize the cluster. **NOTE:** the `gossip_encryption_key` variable must be a 16-byte key that can be generated offline with `consul keygen`. It's **NOT** a good idea to keep this key **in plain text** in source control. It should be encrypted beforehand (with something like KMS) and decrypted by Consul during boot. +1. Modify `variables.tf` to customize the cluster. At a minimum you will want to supply the AMI ID from the image built in step 1. 1. Run `terraform init`. 1. Run `terraform apply`. -1. `ssh` into one of the boxes and make sure all nodes correctly discover each other (by running `consul members` for example). -1. You can also validate that encryption is turned on by looking at `/opt/consul/log/consul-stdout.log` and verifying you see `Encrypt: Gossip: true, TLS-Outgoing: true, TLS-Incoming: true`. \ No newline at end of file +1. `ssh` into one of the boxes and make sure all nodes correctly discover each other (by running `consul members` for example). \ No newline at end of file diff --git a/examples/example-with-acl/variables.tf b/examples/example-with-acl/variables.tf index 861c0312..f7a04a39 100644 --- a/examples/example-with-acl/variables.tf +++ b/examples/example-with-acl/variables.tf @@ -13,7 +13,7 @@ # --------------------------------------------------------------------------------------------------------------------- variable "ami_id" { - description = "The ID of the AMI to run in the cluster. This should be an AMI built from the Packer template under examples/example-with-encryption/packer/consul-with-certs.json. To keep this example simple, we run the same AMI on both server and client nodes, but in real-world usage, your client nodes would also run your apps. If the default value is used, Terraform will look up the latest AMI build automatically." + description = "The ID of the AMI to run in the cluster. This should be an AMI built from the Packer template under examples/consul-ami. To keep this example simple, we run the same AMI on both server and client nodes, but in real-world usage, your client nodes would also run your apps. If the default value is used, Terraform will look up the latest AMI build automatically." type = string default = null } diff --git a/modules/run-consul/README.md b/modules/run-consul/README.md index 0b990a78..237e8e81 100644 --- a/modules/run-consul/README.md +++ b/modules/run-consul/README.md @@ -93,7 +93,9 @@ Options for Consul Autopilot: * `--autopilot-disable-upgrade-migration` (optional)(enterprise-only): If this flag is set, this will disable Autopilot's upgrade migration strategy in Consul Enterprise of waiting until enough newer-versioned servers have been added to the cluster before promoting any of them to voters. Defaults to false. * `--autopilot-upgrade-version-tag` (optional)(enterprise-only): That tag to be used to override the version information used during a migration. +Options for ACL enabling: +* `--enable-acl` (optional): If set, the script will perform the steps required to bootstrap the ACL system in Consul, and populate an [AWS Systems Manager Parameter](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) with the root bootstrap token so that other nodes (include client nodes) can retrieve it and create agent tokens for themselves. **This requires that the IAM Role used by the instances includes a policy that allows for the reading and writing of SSM parameters under the path `/*`. An example of how to do this with the existing Terraform modules present in this repository is located [here](../../examples/example-with-acl).** Example: From c95876846432fca4142e792a77cbd036e825064c Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Wed, 28 Apr 2021 11:25:54 -0400 Subject: [PATCH 23/27] The install-consul script now also installs Git as a required dependency. --- modules/install-consul/install-consul | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index 14878bc3..3e9b69df 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -123,10 +123,10 @@ function install_dependencies { if has_apt_get; then sudo apt-get update -y - sudo apt-get install -y awscli curl unzip jq + sudo apt-get install -y awscli curl unzip jq git elif has_yum; then sudo yum update -y - sudo yum install -y aws curl unzip jq + sudo yum install -y aws curl unzip jq git else log_error "Could not find apt-get or yum. Cannot install dependencies on this OS." exit 1 From 4656f310995a82ca24df970cfd37e4152218a09b Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Wed, 28 Apr 2021 14:14:45 -0400 Subject: [PATCH 24/27] Moved install of bash-commons to install-consul The steps to download and install the 'bash-commons' library have been moved to the 'install-consul' script so that it is not dependent upon the Packer configuration to include this required step. --- examples/consul-ami/consul.json | 7 ------- modules/install-consul/install-consul | 5 +++++ 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/examples/consul-ami/consul.json b/examples/consul-ami/consul.json index 2a784e87..423ca71b 100644 --- a/examples/consul-ami/consul.json +++ b/examples/consul-ami/consul.json @@ -96,12 +96,5 @@ "/tmp/terraform-aws-consul/modules/setup-systemd-resolved/setup-systemd-resolved" ], "pause_before": "30s" - },{ - "type": "shell", - "inline": [ - "sudo mkdir -p /opt/gruntwork", - "git clone --branch v0.1.4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons", - "sudo cp -r /tmp/bash-commons/modules/bash-commons/src /opt/gruntwork/bash-commons" - ] }] } diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index 3e9b69df..8191cd71 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -131,6 +131,11 @@ function install_dependencies { log_error "Could not find apt-get or yum. Cannot install dependencies on this OS." exit 1 fi + + sudo mkdir -p /opt/gruntwork + git clone --branch v0.1.4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons + sudo cp -r /tmp/bash-commons/modules/bash-commons/src /opt/gruntwork/bash-commons + } function user_exists { From e7dfaaeb99b0ea0f3796155578a147f13760a3c8 Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Fri, 7 May 2021 16:35:40 -0400 Subject: [PATCH 25/27] Tweak of token generation for agents to use newer method The method used by the run-consul script to generate the agent token has been enhanced to use a newer method when the version of Consul is compatible with it. --- modules/run-consul/consul-common.sh | 46 +++++++++++++++++++++++++++++ modules/run-consul/run-consul | 26 ++++++++++++---- 2 files changed, 66 insertions(+), 6 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index ec5f1be2..a721f916 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -498,6 +498,23 @@ function generate_token { echo $generated_token } +function generate_agent_token { + local -r node_name="$1" + local -r datacenter_name="$2" + local -r token="$3" + + if [[ ! "$token" == "" ]]; then + token_arg="-token $token" + else + token_arg="" + fi + + local -r generated_token=$(consul acl token create -node-identity="$node_name:$datacenter_name" -format=json $token_arg | jq '.SecretID' -r) + + echo $generated_token + +} + function set_agent_token { local -r agent_token="$1" local -r token="$2" @@ -511,4 +528,33 @@ function set_agent_token { fi consul acl set-agent-token $token_arg agent "$token" +} + +function get_consul_version { + local consul_version + + consul_version=$(consul -version | grep 'v[0-9]' | sed 's/Consul v//g') + + echo $consul_version +} + +function get_consul_major_version { + local major_version + + major_version=$(get_consul_version | cut -d'.' -f1) + echo $major_version +} + +function get_consul_minor_version { + local minor_version + + minor_version=$(get_consul_version | cut -d'.' -f2) + echo $minor_version +} + +function get_consul_build_version { + local build_version + + minor_version=$(get_consul_version | cut -d'.' -f3) + echo $build_version } \ No newline at end of file diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index 3b2e98de..d20e30fd 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -361,13 +361,27 @@ function run { bootstrap_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region "ssm") fi - # Generate agent policy dynamically and write it - log_info "Creating agent policy and token" - local -r agent_policy=$(generate_node_acl_policy $local_hostname) - write_acl_policy "$instance_id" "$agent_policy" "$bootstrap_token" + local -r consul_major_version=$(get_consul_major_version) + local -r consul_minor_version=$(get_consul_minor_version) + local -r consul_build_version=$(get_consul_build_version) + + + local agent_token + + if ([ $consul_major_version -ge 1 ] && ( [ $consul_minor_version -ge 9 ] || ([ $consul_minor_version -eq 8 ] && [ $consul_build_version -ge 1 ]))) || [ $consul_major_version -gt 1 ]; then + # Generate the agent token using the newer method + log_info "Creating agent token using -node-idenity" + agent_token=$(generate_agent_token $local_hostname $datacenter $bootstrap_token) + else + # Generate agent policy dynamically and write it + log_info "Creating agent policy and token" + local -r agent_policy=$(generate_node_acl_policy $local_hostname) + write_acl_policy "$instance_id" "$agent_policy" "$bootstrap_token" + + # Generate agent token and persist it + agent_token=$(generate_token "$instance_id" "$instance_id agent policy" $bootstrap_token) + fi - # Generate agent token and persist it - local -r agent_token=$(generate_token "$instance_id" "$instance_id agent policy" $bootstrap_token) set_agent_token "$agent_token" "$bootstrap_token" fi From 9e3c70b94756c95f90fb50b51868ba893bfeb66a Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 10 May 2021 12:46:35 -0400 Subject: [PATCH 26/27] Refactor of ACL storage logic - The run-consul script will now source a specific script file based on the value given by the new '--acl-storage-type' command line argument. This allows for easier addition of new methods of storing ACL tokens based on a set of common interface functions (write_acl_token, read_acl_token). - An additional check to ensure that the token generated for the agent is not empty has been added, so that the script will properly throw an error and exit if the token cannot be generated. - The install-consul script has been updated to include copying all '*.sh' files in the 'modules/run-consul' directory, so as to include any new common script files added from here on out. - The example deployment with ACLs has been updated with the newly added command line arguments for the user data scripts. - The test for ACL enabled clusters has been corrected so that it expects an empty response to 'consul members' when not passing in an ACL token. --- examples/example-with-acl/user-data-client.sh | 4 +- examples/example-with-acl/user-data-server.sh | 2 +- modules/install-consul/install-consul | 2 +- modules/run-consul/consul-bootstrap-ssm.sh | 52 ++++++++++++++++ modules/run-consul/consul-common.sh | 60 ------------------- modules/run-consul/run-consul | 47 +++++++++++++-- test/consul_helpers.go | 2 +- 7 files changed, 98 insertions(+), 71 deletions(-) create mode 100644 modules/run-consul/consul-bootstrap-ssm.sh diff --git a/examples/example-with-acl/user-data-client.sh b/examples/example-with-acl/user-data-client.sh index e4f0f7c4..20a4866d 100644 --- a/examples/example-with-acl/user-data-client.sh +++ b/examples/example-with-acl/user-data-client.sh @@ -14,7 +14,7 @@ if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. - gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" --enable-acl + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" fi if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then @@ -23,6 +23,6 @@ fi # TODO: Add option for enabling ACL -/opt/consul/bin/run-consul --client --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration +/opt/consul/bin/run-consul --client --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl --acl-storage-type ssm # You could add commands to boot your other apps here \ No newline at end of file diff --git a/examples/example-with-acl/user-data-server.sh b/examples/example-with-acl/user-data-server.sh index bbfdffd6..7ad11539 100755 --- a/examples/example-with-acl/user-data-server.sh +++ b/examples/example-with-acl/user-data-server.sh @@ -23,4 +23,4 @@ fi # TODO: Add option for enabling ACL -/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl \ No newline at end of file +/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl --acl-storage-type ssm \ No newline at end of file diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index 8191cd71..72b080bd 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -208,7 +208,7 @@ function install_binary { log_info "Copying Consul run script to $run_consul_dest_path" sudo cp "$SCRIPT_DIR/../run-consul/run-consul" "$run_consul_dest_path" log_info "Copying common scripts to $run_consul_dest_path" - sudo cp "$SCRIPT_DIR/../run-consul/consul-common.sh" "$bin_dir/consul-common.sh" + sudo cp "$SCRIPT_DIR/../run-consul/"*.sh "$bin_dir/" sudo chown "$username:$username" "$run_consul_dest_path" sudo chmod a+x "$run_consul_dest_path" } diff --git a/modules/run-consul/consul-bootstrap-ssm.sh b/modules/run-consul/consul-bootstrap-ssm.sh new file mode 100644 index 00000000..8d2b5d13 --- /dev/null +++ b/modules/run-consul/consul-bootstrap-ssm.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -e + +function get_acl_token_parameter_name { + local -r cluster_name="$1" + local -r token_name="${2:-bootstrap}" + echo "/$cluster_name/token/$token_name" +} + +function read_acl_token { + local -r cluster_name="$1" + local -r token_name="${2:-bootstrap}" + local -r aws_region="$3" + local -r max_retries="${4:-60}" + local -r sleep_between_retries="${5:-5}" + local -r ignore_error="${6:-false}" + + local parameter_name=$(get_acl_token_parameter_name $cluster_name $token_name) + local parameters + local parameter_exists + local token + + for (( i=0; i<"$max_retries"; i++ )); do + parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption --region $aws_region) + parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') + if [[ $parameter_exists -eq 1 ]]; then + token=$(echo $parameters | jq '.Parameters[0].Value' -r) + echo $token + return + else + log_info "Parameter $parameter_name does not yet exist." + sleep "$sleep_between_retries" + fi + done + log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." + if [[ "$ignore_error" == "false" ]]; then + exit 1 + fi +} + +function write_acl_token { + local -r token="$1" + local -r cluster_name="$2" + local -r token_name="${3:-bootstrap}" + local -r aws_region="$4" + local -r storage_type="$5" + + local -r parameter_name=$(get_acl_token_parameter_name $cluster_name $token_name) + aws ssm put-parameter --name $parameter_name --value $token --type SecureString --region $aws_region + +} \ No newline at end of file diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index a721f916..15235b7b 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -222,66 +222,6 @@ function get_owner_of_path { ls -ld "$path" | awk '{print $3}' } -function get_acl_token_ssm_parameter_name { - local -r cluster_name="$1" - local -r token_name="${2:-bootstrap}" - echo "/$cluster_name/token/$token_name" -} - -function read_acl_token { - local -r cluster_name="$1" - local -r token_name="${2:-bootstrap}" - local -r aws_region="$3" - local -r storage_type="$4" - local -r max_retries="${5:-60}" - local -r sleep_between_retries="${6:-5}" - local -r ignore_error="${7:-false}" - - if [[ $storage_type == "ssm" ]]; then - local parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name $token_name) - local parameters - local parameter_exists - local token - - for (( i=0; i<"$max_retries"; i++ )); do - parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption --region $aws_region) - parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') - if [[ $parameter_exists -eq 1 ]]; then - token=$(echo $parameters | jq '.Parameters[0].Value' -r) - echo $token - return - else - log_info "Parameter $parameter_name does not yet exist." - sleep "$sleep_between_retries" - fi - done - log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." - if [[ "$ignore_error" == "false" ]]; then - exit 1 - fi - else - log_error "ACL storage type '${storage_type}' is not supported." - exit 1 - fi -} - -function write_acl_token { - local -r token="$1" - local -r cluster_name="$2" - local -r token_name="${3:-bootstrap}" - local -r aws_region="$4" - local -r storage_type="$5" - - if [[ $storage_type == "ssm" ]]; then - local -r parameter_name=$(get_acl_token_ssm_parameter_name $cluster_name $token_name) - aws ssm put-parameter --name $parameter_name --value $token --type SecureString --region $aws_region - else - log_error "ACL storage type '${storage_type}' is not supported." - exit 1 - fi - -} - function generate_consul_config { local -r server="${1}" local -r config_dir="${2}" diff --git a/modules/run-consul/run-consul b/modules/run-consul/run-consul index d20e30fd..cac82506 100755 --- a/modules/run-consul/run-consul +++ b/modules/run-consul/run-consul @@ -55,7 +55,8 @@ function print_usage { echo -e " --environment\t\tA single environment variable in the key/value pair form 'KEY=\"val\"' to pass to Consul as environment variable when starting it up. Repeat this option for additional variables. Optional." echo -e " --skip-consul-config\tIf this flag is set, don't generate a Consul configuration file. Optional. Default is false." echo -e " --recursor\tThis flag provides address of upstream DNS server that is used to recursively resolve queries if they are not inside the service domain for Consul. Repeat this option for additional variables. Optional." - echo -e " --enable-acl\tThis flag will cause the cluster to bootstrap a root ACL token upon first provision, which is then stored in AWS SSM for other cluster members to retrieve." + echo -e " --enable-acl\tThis flag will cause the cluster to bootstrap a root ACL token upon first provision, which is then stored in a user chosen store for other cluster members to retrieve." + echo -e " --acl-store-type\tThe type of storage to use when persisting the root ACL token. Currently the only supported value is 'ssm'." echo echo "Options for Consul Autopilot:" echo @@ -102,6 +103,8 @@ function run { local server_stabilization_time="$DEFAULT_AUTOPILOT_SERVER_STABILIZATION_TIME" local redundancy_zone_tag="$DEFAULT_AUTOPILOT_REDUNDANCY_ZONE_TAG" local disable_upgrade_migration="$DEFAULT_AUTOPILOT_DISABLE_UPGRADE_MIGRATION" + local enable_acl + local acl_storage_type while [[ $# -gt 0 ]]; do local key="$1" @@ -232,6 +235,11 @@ function run { --enable-acl) enable_acl="true" ;; + --acl-storage-type) + assert_not_empty "$key" "$2" + acl_storage_type="$2" + shift + ;; --recursor) assert_not_empty "$key" "$2" recursors+=("$2") @@ -251,6 +259,29 @@ function run { shift done + if [[ "$enable_acl" == "true" ]]; then + if [ -z "$acl_storage_type" ] || [ "$acl_storage_type" == "" ]; then + log_error "You must specify an option for the --acl-storage-type parameter when --enable-acl is specified." + exit 1 + fi + + local storage_type_matched="false" + + # Source appropriate storage provider script + case "$acl_storage_type" in + 'ssm' | 'SSM') + storage_type_matched="true" + source ${SCRIPT_DIR}/consul-bootstrap-ssm.sh;; + + *) + if [ $storage_type_matched="false" ]; then + log_error "ACL storage type '${acl_storage_type}' is not supported." + exit 1 + fi;; + + esac + fi + if [[ ("$server" == "true" && "$client" == "true") || ("$server" == "false" && "$client" == "false") ]]; then log_error "Exactly one of --server or --client must be set." exit 1 @@ -342,7 +373,7 @@ function run { if [[ "$rally_point_hostname" == "$local_hostname" ]]; then log_info "Checking if bootstrap token already exists" - local -r existing_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region "ssm" 1 0 "true") + local -r existing_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region 1 0 "true") if [[ "$existing_token" == "" ]] && [ "${server}" == "true" ]; then log_info "Generating bootstrap ACL token" @@ -358,7 +389,7 @@ function run { # then we need to read it. if [[ -z "$bootstrap_token" ]]; then log_info "Acquiring bootstrap token" - bootstrap_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region "ssm") + bootstrap_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region) fi local -r consul_major_version=$(get_consul_major_version) @@ -371,7 +402,7 @@ function run { if ([ $consul_major_version -ge 1 ] && ( [ $consul_minor_version -ge 9 ] || ([ $consul_minor_version -eq 8 ] && [ $consul_build_version -ge 1 ]))) || [ $consul_major_version -gt 1 ]; then # Generate the agent token using the newer method log_info "Creating agent token using -node-idenity" - agent_token=$(generate_agent_token $local_hostname $datacenter $bootstrap_token) + agent_token=$(generate_agent_token $instance_id $datacenter $bootstrap_token) else # Generate agent policy dynamically and write it log_info "Creating agent policy and token" @@ -382,8 +413,12 @@ function run { agent_token=$(generate_token "$instance_id" "$instance_id agent policy" $bootstrap_token) fi - set_agent_token "$agent_token" "$bootstrap_token" - + if [ -z "$agent_token" ] || [ "$agent_token" == "" ]; then + log_error "Unable to acquire agent token. Aborting." + exit 1 + else + set_agent_token "$agent_token" "$bootstrap_token" + fi fi } diff --git a/test/consul_helpers.go b/test/consul_helpers.go index fd30864e..8b098abb 100644 --- a/test/consul_helpers.go +++ b/test/consul_helpers.go @@ -126,7 +126,7 @@ func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examples checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion, enableAcl) // Check the Consul clients - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, false) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, enableAcl) }) } From 9c30efed6bc21d25c3a5f4b4fc1831a39f63e82e Mon Sep 17 00:00:00 2001 From: Josh Feierman Date: Mon, 10 May 2021 12:49:14 -0400 Subject: [PATCH 27/27] Correction of agent policy template - The function that generates the agent ACL token policy has been updated to remove un-necessary read permissions. It now matches the example given in the official Hashicorp tutorial (https://learn.hashicorp.com/tutorials/consul/access-control-setup-production\#create-the-agent-policy). --- modules/run-consul/consul-common.sh | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh index 15235b7b..2a87576a 100644 --- a/modules/run-consul/consul-common.sh +++ b/modules/run-consul/consul-common.sh @@ -394,10 +394,6 @@ function generate_node_acl_policy { local -r node_name="$1" local -r policy_hcl=$(cat <