diff --git a/examples/example-with-acl/README.md b/examples/example-with-acl/README.md new file mode 100644 index 00000000..298e67bf --- /dev/null +++ b/examples/example-with-acl/README.md @@ -0,0 +1,16 @@ +# Consul cluster with ACL example + +This folder contains a set of Terraform manifest for deploying a Consul cluster in AWS which has [ACL](https://www.consul.io/docs/security/acl) enabled. The root bootstrap token is stored in an [AWS Systems Manager Parameter](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) so that other nodes can retrieve it and create agent tokens for themselves. + +The end result of this example should be a cluster of 3 Consul servers and 3 Consul clients, all running on individual EC2 instances. + +## Quick start + +To deploy a Consul cluster with ACL enabled: + +1. Create a new AMI using the Packer manifest in the [`examples/consul-ami`](../consul-ami) directory. Make note of the resulting AMI ID as you will need that for step 3. +1. Modify `main.tf` to add your provider credentials, VPC/subnet ids if you need to, etc. +1. Modify `variables.tf` to customize the cluster. At a minimum you will want to supply the AMI ID from the image built in step 1. +1. Run `terraform init`. +1. Run `terraform apply`. +1. `ssh` into one of the boxes and make sure all nodes correctly discover each other (by running `consul members` for example). \ No newline at end of file diff --git a/examples/example-with-acl/main.tf b/examples/example-with-acl/main.tf new file mode 100644 index 00000000..48f43ca0 --- /dev/null +++ b/examples/example-with-acl/main.tf @@ -0,0 +1,158 @@ +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY A CONSUL CLUSTER IN AWS +# These templates show an example of how to use the consul-cluster module to deploy Consul in AWS. We deploy two Auto +# Scaling Groups (ASGs): one with a small number of Consul server nodes and one with a larger number of Consul client +# nodes. Note that these templates assume that the AMI you provide via the ami_id input variable is built from +# the examples/example-with-encryption/packer/consul-with-certs.json Packer template. +# --------------------------------------------------------------------------------------------------------------------- + +# ---------------------------------------------------------------------------------------------------------------------- +# REQUIRE A SPECIFIC TERRAFORM VERSION OR HIGHER +# ---------------------------------------------------------------------------------------------------------------------- +terraform { + # This module is now only being tested with Terraform 0.14.x. However, to make upgrading easier, we are setting + # 0.12.26 as the minimum version, as that version added support for required_providers with source URLs, making it + # forwards compatible with 0.14.x code. + required_version = ">= 0.12.26" +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY THE CONSUL SERVER NODES +# --------------------------------------------------------------------------------------------------------------------- + +module "consul_servers" { + # When using these modules in your own templates, you will need to use a Git URL with a ref attribute that pins you + # to a specific version of the modules, such as the following example: + # source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-cluster?ref=v0.0.1" + source = "../../modules/consul-cluster" + + cluster_name = "${var.cluster_name}-server" + cluster_size = var.num_servers + instance_type = "t2.micro" + spot_price = var.spot_price + + # The EC2 Instances will use these tags to automatically discover each other and form a cluster + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + + ami_id = var.ami_id + user_data = data.template_file.user_data_server.rendered + + vpc_id = data.aws_vpc.default.id + subnet_ids = data.aws_subnet_ids.default.ids + + # TODO: Add variable enable_acl + + # To make testing easier, we allow Consul and SSH requests from any IP address here but in a production + # deployment, we strongly recommend you limit this to the IP address ranges of known, trusted servers inside your VPC. + allowed_ssh_cidr_blocks = ["0.0.0.0/0"] + + allowed_inbound_cidr_blocks = ["0.0.0.0/0"] + ssh_key_name = var.ssh_key_name + acl_store_type = var.acl_store_type + + tags = [ + { + key = "Environment" + value = "development" + propagate_at_launch = true + } + ] +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE USER DATA SCRIPT THAT WILL RUN ON EACH CONSUL SERVER EC2 INSTANCE WHEN IT'S BOOTING +# This script will configure and start Consul +# --------------------------------------------------------------------------------------------------------------------- + +data "template_file" "user_data_server" { + template = file("${path.module}/user-data-server.sh") + + vars = { + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + enable_gossip_encryption = var.enable_gossip_encryption + gossip_encryption_key = var.gossip_encryption_key + enable_rpc_encryption = var.enable_rpc_encryption + ca_path = var.ca_path + cert_file_path = var.cert_file_path + key_file_path = var.key_file_path + # TODO Add enable_acl + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY THE CONSUL CLIENT NODES +# Note that you do not have to use the consul-cluster module to deploy your clients. We do so simply because it +# provides a convenient way to deploy an Auto Scaling Group with the necessary IAM and security group permissions for +# Consul, but feel free to deploy those clients however you choose (e.g. a single EC2 Instance, a Docker cluster, etc). +# --------------------------------------------------------------------------------------------------------------------- + +module "consul_clients" { + # When using these modules in your own templates, you will need to use a Git URL with a ref attribute that pins you + # to a specific version of the modules, such as the following example: + # source = "git::git@github.com:hashicorp/terraform-aws-consul.git//modules/consul-cluster?ref=v0.0.1" + source = "../../modules/consul-cluster" + + cluster_name = "${var.cluster_name}-client" + cluster_size = var.num_clients + instance_type = "t2.micro" + spot_price = var.spot_price + + cluster_tag_key = "consul-clients" + cluster_tag_value = var.cluster_name + + ami_id = var.ami_id + user_data = data.template_file.user_data_client.rendered + + vpc_id = data.aws_vpc.default.id + subnet_ids = data.aws_subnet_ids.default.ids + + # To make testing easier, we allow Consul and SSH requests from any IP address here but in a production + # deployment, we strongly recommend you limit this to the IP address ranges of known, trusted servers inside your VPC. + allowed_ssh_cidr_blocks = ["0.0.0.0/0"] + + allowed_inbound_cidr_blocks = ["0.0.0.0/0"] + ssh_key_name = var.ssh_key_name + + acl_store_type = var.acl_store_type +} + +# --------------------------------------------------------------------------------------------------------------------- +# THE USER DATA SCRIPT THAT WILL RUN ON EACH CONSUL CLIENT EC2 INSTANCE WHEN IT'S BOOTING +# This script will configure and start Consul +# --------------------------------------------------------------------------------------------------------------------- + +data "template_file" "user_data_client" { + template = file("${path.module}/user-data-client.sh") + + vars = { + cluster_tag_key = var.cluster_tag_key + cluster_tag_value = var.cluster_name + enable_gossip_encryption = var.enable_gossip_encryption + gossip_encryption_key = var.gossip_encryption_key + enable_rpc_encryption = var.enable_rpc_encryption + ca_path = var.ca_path + cert_file_path = var.cert_file_path + key_file_path = var.key_file_path + # TODO Add enable_acl variable + } +} + +# --------------------------------------------------------------------------------------------------------------------- +# DEPLOY CONSUL IN THE DEFAULT VPC AND SUBNETS +# Using the default VPC and subnets makes this example easy to run and test, but it means Consul is accessible from the +# public Internet. For a production deployment, we strongly recommend deploying into a custom VPC with private subnets. +# --------------------------------------------------------------------------------------------------------------------- + +data "aws_vpc" "default" { + default = var.vpc_id == null ? true : false + id = var.vpc_id +} + +data "aws_subnet_ids" "default" { + vpc_id = data.aws_vpc.default.id +} + +data "aws_region" "current" { +} diff --git a/examples/example-with-acl/outputs.tf b/examples/example-with-acl/outputs.tf new file mode 100644 index 00000000..493b36c2 --- /dev/null +++ b/examples/example-with-acl/outputs.tf @@ -0,0 +1,59 @@ +output "num_servers" { + value = module.consul_servers.cluster_size +} + +output "asg_name_servers" { + value = module.consul_servers.asg_name +} + +output "launch_config_name_servers" { + value = module.consul_servers.launch_config_name +} + +output "iam_role_arn_servers" { + value = module.consul_servers.iam_role_arn +} + +output "iam_role_id_servers" { + value = module.consul_servers.iam_role_id +} + +output "security_group_id_servers" { + value = module.consul_servers.security_group_id +} + +output "num_clients" { + value = module.consul_clients.cluster_size +} + +output "asg_name_clients" { + value = module.consul_clients.asg_name +} + +output "launch_config_name_clients" { + value = module.consul_clients.launch_config_name +} + +output "iam_role_arn_clients" { + value = module.consul_clients.iam_role_arn +} + +output "iam_role_id_clients" { + value = module.consul_clients.iam_role_id +} + +output "security_group_id_clients" { + value = module.consul_clients.security_group_id +} + +output "aws_region" { + value = data.aws_region.current.name +} + +output "consul_servers_cluster_tag_key" { + value = module.consul_servers.cluster_tag_key +} + +output "consul_servers_cluster_tag_value" { + value = module.consul_servers.cluster_tag_value +} diff --git a/examples/example-with-acl/user-data-client.sh b/examples/example-with-acl/user-data-client.sh new file mode 100644 index 00000000..20a4866d --- /dev/null +++ b/examples/example-with-acl/user-data-client.sh @@ -0,0 +1,28 @@ +#!/bin/bash +# This script is meant to be run in the User Data of each EC2 Instance while it's booting. The script uses the +# run-consul script to configure and start Consul in client mode. Note that this script assumes it's running in an AMI +# built from the Packer template in examples/consul-ami/consul.json. + +set -e + +# Send the log output from this script to user-data.log, syslog, and the console +# From: https://alestic.com/2010/12/ec2-user-data-output/ +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + +# These variables are passed in via Terraform template interplation +if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" ]]; then + # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file + # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an + # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" +fi + +if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then + rpc_encryption_configuration="--enable-rpc-encryption --ca-path ${ca_path} --cert-file-path ${cert_file_path} --key-file-path ${key_file_path}" +fi + +# TODO: Add option for enabling ACL + +/opt/consul/bin/run-consul --client --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl --acl-storage-type ssm + +# You could add commands to boot your other apps here \ No newline at end of file diff --git a/examples/example-with-acl/user-data-server.sh b/examples/example-with-acl/user-data-server.sh new file mode 100755 index 00000000..7ad11539 --- /dev/null +++ b/examples/example-with-acl/user-data-server.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# This script is meant to be run in the User Data of each EC2 Instance while it's booting. The script uses the +# run-consul script to configure and start Consul in server mode. Note that this script assumes it's running in an AMI +# built from the Packer template in examples/consul-ami/consul.json. + +set -e + +# Send the log output from this script to user-data.log, syslog, and the console +# From: https://alestic.com/2010/12/ec2-user-data-output/ +exec > >(tee /var/log/user-data.log|logger -t user-data -s 2>/dev/console) 2>&1 + +# These variables are passed in via Terraform template interplation +if [[ "${enable_gossip_encryption}" == "true" && ! -z "${gossip_encryption_key}" ]]; then + # Note that setting the encryption key in plain text here means that it will be readable from the Terraform state file + # and/or the EC2 API/console. We're doing this for simplicity, but in a real production environment you should pass an + # encrypted key to Terraform and decrypt it before passing it to run-consul with something like KMS. + gossip_encryption_configuration="--enable-gossip-encryption --gossip-encryption-key ${gossip_encryption_key}" +fi + +if [[ "${enable_rpc_encryption}" == "true" && ! -z "${ca_path}" && ! -z "${cert_file_path}" && ! -z "${key_file_path}" ]]; then + rpc_encryption_configuration="--enable-rpc-encryption --ca-path ${ca_path} --cert-file-path ${cert_file_path} --key-file-path ${key_file_path}" +fi + +# TODO: Add option for enabling ACL + +/opt/consul/bin/run-consul --server --cluster-tag-key "${cluster_tag_key}" --cluster-tag-value "${cluster_tag_value}" $gossip_encryption_configuration $rpc_encryption_configuration --enable-acl --acl-storage-type ssm \ No newline at end of file diff --git a/examples/example-with-acl/variables.tf b/examples/example-with-acl/variables.tf new file mode 100644 index 00000000..f7a04a39 --- /dev/null +++ b/examples/example-with-acl/variables.tf @@ -0,0 +1,107 @@ +# --------------------------------------------------------------------------------------------------------------------- +# ENVIRONMENT VARIABLES +# Define these secrets as environment variables +# --------------------------------------------------------------------------------------------------------------------- + +# AWS_ACCESS_KEY_ID +# AWS_SECRET_ACCESS_KEY +# AWS_DEFAULT_REGION + +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# These parameters have reasonable defaults. +# --------------------------------------------------------------------------------------------------------------------- + +variable "ami_id" { + description = "The ID of the AMI to run in the cluster. This should be an AMI built from the Packer template under examples/consul-ami. To keep this example simple, we run the same AMI on both server and client nodes, but in real-world usage, your client nodes would also run your apps. If the default value is used, Terraform will look up the latest AMI build automatically." + type = string + default = null +} + +variable "cluster_name" { + description = "What to name the Consul cluster and all of its associated resources" + type = string + default = "consul-example" +} + +variable "num_servers" { + description = "The number of Consul server nodes to deploy. We strongly recommend using 3 or 5." + type = number + default = 3 +} + +variable "num_clients" { + description = "The number of Consul client nodes to deploy. You typically run the Consul client alongside your apps, so set this value to however many Instances make sense for your app code." + type = number + default = 3 +} + +variable "cluster_tag_key" { + description = "The tag the EC2 Instances will look for to automatically discover each other and form a cluster." + type = string + default = "consul-servers" +} + +variable "ssh_key_name" { + description = "The name of an EC2 Key Pair that can be used to SSH to the EC2 Instances in this cluster. Set to an empty string to not associate a Key Pair." + type = string + default = null +} + +variable "vpc_id" { + description = "The ID of the VPC in which the nodes will be deployed. Uses default VPC if not supplied." + type = string + default = null +} + +variable "spot_price" { + description = "The maximum hourly price to pay for EC2 Spot Instances." + type = string + default = null +} + +variable "enable_gossip_encryption" { + description = "Encrypt gossip traffic between nodes. Must also specify encryption key." + type = bool + default = false +} + +variable "enable_rpc_encryption" { + description = "Encrypt RPC traffic between nodes. Must also specify TLS certificates and keys." + type = bool + default = false +} + +variable "gossip_encryption_key" { + description = "16 byte cryptographic key to encrypt gossip traffic between nodes. Must set 'enable_gossip_encryption' to true for this to take effect. WARNING: Setting the encryption key here means it will be stored in plain text. We're doing this here to keep the example simple, but in production you should inject it more securely, e.g. retrieving it from KMS." + type = string + default = "" +} + +variable "ca_path" { + description = "Path to the directory of CA files used to verify outgoing connections." + type = string + default = "/opt/consul/tls/ca" +} + +variable "cert_file_path" { + description = "Path to the certificate file used to verify incoming connections." + type = string + default = "/opt/consul/tls/consul.crt.pem" +} + +variable "key_file_path" { + description = "Path to the certificate key used to verify incoming connections." + type = string + default = "/opt/consul/tls/consul.key.pem" +} + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will be able to write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "ssm" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} diff --git a/modules/consul-cluster/main.tf b/modules/consul-cluster/main.tf index 72f357be..e2fc1248 100644 --- a/modules/consul-cluster/main.tf +++ b/modules/consul-cluster/main.tf @@ -245,7 +245,9 @@ data "aws_iam_policy_document" "instance_role" { module "iam_policies" { source = "../consul-iam-policies" - enabled = var.enable_iam_setup - iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + enabled = var.enable_iam_setup + iam_role_id = element(concat(aws_iam_role.instance_role.*.id, [""]), 0) + acl_store_type = var.acl_store_type + cluster_tag_value = var.cluster_tag_value } diff --git a/modules/consul-cluster/variables.tf b/modules/consul-cluster/variables.tf index eb7a941b..3e5793c1 100644 --- a/modules/consul-cluster/variables.tf +++ b/modules/consul-cluster/variables.tf @@ -289,3 +289,13 @@ variable "protect_from_scale_in" { type = bool default = false } + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will be able to write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} diff --git a/modules/consul-iam-policies/main.tf b/modules/consul-iam-policies/main.tf index 7bb6aee5..419cb1b3 100644 --- a/modules/consul-iam-policies/main.tf +++ b/modules/consul-iam-policies/main.tf @@ -34,3 +34,31 @@ data "aws_iam_policy_document" "auto_discover_cluster" { } } +data "aws_iam_policy_document" "acl_token_cluster_ssm" { + + count = var.acl_store_type == "ssm" ? 1 : 0 + + statement { + effect = "Allow" + + actions = [ "ssm:PutParameter", "ssm:GetParameters" ] + + resources = [ "arn:aws:ssm:${data.aws_region.current.name}:${data.aws_caller_identity.current.account_id}:parameter/${var.cluster_tag_value}/*" ] + } +} + +resource "aws_iam_role_policy" "ssm" { + count = var.acl_store_type == "ssm" ? 1 : 0 + name = "ssm-cluster" + role = var.iam_role_id + policy = data.aws_iam_policy_document.acl_token_cluster_ssm[0].json +} + +data "aws_caller_identity" "current" { + +} + +data "aws_region" "current" { + +} + diff --git a/modules/consul-iam-policies/variables.tf b/modules/consul-iam-policies/variables.tf index 1bda7959..f4695b39 100644 --- a/modules/consul-iam-policies/variables.tf +++ b/modules/consul-iam-policies/variables.tf @@ -14,3 +14,23 @@ variable "enabled" { default = true } +# --------------------------------------------------------------------------------------------------------------------- +# OPTIONAL PARAMETERS +# You may provide a value for each of these parameters; in some cases they may be required if certain other options are turned on. +# --------------------------------------------------------------------------------------------------------------------- + +variable "cluster_tag_value" { + description = "The EC2 tag value used to identify cluster members. This is only required if you set 'acl_store_type' to 'ssm', so that the instances can write to / read from SSM parameters under the correct path." + type = string + default = "" +} + +variable "acl_store_type" { + description = "The type of cloud store where the cluster will write / read ACL tokens. If left at the default then no related policies will be created." + type = string + default = "" + validation { + condition = contains(["ssm",""],var.acl_store_type) + error_message = "You must specify a supported store type for ACL tokens. Currently the only allowed value is 'ssm'." + } +} \ No newline at end of file diff --git a/modules/install-consul/install-consul b/modules/install-consul/install-consul index 789ebbb4..72b080bd 100755 --- a/modules/install-consul/install-consul +++ b/modules/install-consul/install-consul @@ -17,6 +17,8 @@ readonly SYSTEM_BIN_DIR="/usr/local/bin" readonly SCRIPT_NAME="$(basename "$0")" +# TODO: Add step to check if bash-commons is installed + function print_usage { echo echo "Usage: install-consul [OPTIONS]" @@ -121,14 +123,19 @@ function install_dependencies { if has_apt_get; then sudo apt-get update -y - sudo apt-get install -y awscli curl unzip jq + sudo apt-get install -y awscli curl unzip jq git elif has_yum; then sudo yum update -y - sudo yum install -y aws curl unzip jq + sudo yum install -y aws curl unzip jq git else log_error "Could not find apt-get or yum. Cannot install dependencies on this OS." exit 1 fi + + sudo mkdir -p /opt/gruntwork + git clone --branch v0.1.4 https://github.com/gruntwork-io/bash-commons.git /tmp/bash-commons + sudo cp -r /tmp/bash-commons/modules/bash-commons/src /opt/gruntwork/bash-commons + } function user_exists { @@ -200,6 +207,8 @@ function install_binary { log_info "Copying Consul run script to $run_consul_dest_path" sudo cp "$SCRIPT_DIR/../run-consul/run-consul" "$run_consul_dest_path" + log_info "Copying common scripts to $run_consul_dest_path" + sudo cp "$SCRIPT_DIR/../run-consul/"*.sh "$bin_dir/" sudo chown "$username:$username" "$run_consul_dest_path" sudo chmod a+x "$run_consul_dest_path" } diff --git a/modules/run-consul/README.md b/modules/run-consul/README.md index 0b990a78..237e8e81 100644 --- a/modules/run-consul/README.md +++ b/modules/run-consul/README.md @@ -93,7 +93,9 @@ Options for Consul Autopilot: * `--autopilot-disable-upgrade-migration` (optional)(enterprise-only): If this flag is set, this will disable Autopilot's upgrade migration strategy in Consul Enterprise of waiting until enough newer-versioned servers have been added to the cluster before promoting any of them to voters. Defaults to false. * `--autopilot-upgrade-version-tag` (optional)(enterprise-only): That tag to be used to override the version information used during a migration. +Options for ACL enabling: +* `--enable-acl` (optional): If set, the script will perform the steps required to bootstrap the ACL system in Consul, and populate an [AWS Systems Manager Parameter](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html) with the root bootstrap token so that other nodes (include client nodes) can retrieve it and create agent tokens for themselves. **This requires that the IAM Role used by the instances includes a policy that allows for the reading and writing of SSM parameters under the path `/*`. An example of how to do this with the existing Terraform modules present in this repository is located [here](../../examples/example-with-acl).** Example: diff --git a/modules/run-consul/consul-bootstrap-ssm.sh b/modules/run-consul/consul-bootstrap-ssm.sh new file mode 100644 index 00000000..8d2b5d13 --- /dev/null +++ b/modules/run-consul/consul-bootstrap-ssm.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +set -e + +function get_acl_token_parameter_name { + local -r cluster_name="$1" + local -r token_name="${2:-bootstrap}" + echo "/$cluster_name/token/$token_name" +} + +function read_acl_token { + local -r cluster_name="$1" + local -r token_name="${2:-bootstrap}" + local -r aws_region="$3" + local -r max_retries="${4:-60}" + local -r sleep_between_retries="${5:-5}" + local -r ignore_error="${6:-false}" + + local parameter_name=$(get_acl_token_parameter_name $cluster_name $token_name) + local parameters + local parameter_exists + local token + + for (( i=0; i<"$max_retries"; i++ )); do + parameters=$(aws ssm get-parameters --names $parameter_name --with-decryption --region $aws_region) + parameter_exists=$(echo $parameters | jq '[.Parameters[]] | length') + if [[ $parameter_exists -eq 1 ]]; then + token=$(echo $parameters | jq '.Parameters[0].Value' -r) + echo $token + return + else + log_info "Parameter $parameter_name does not yet exist." + sleep "$sleep_between_retries" + fi + done + log_error "Parameter $parameter_name still does not exist after exceeding maximum number of retries." + if [[ "$ignore_error" == "false" ]]; then + exit 1 + fi +} + +function write_acl_token { + local -r token="$1" + local -r cluster_name="$2" + local -r token_name="${3:-bootstrap}" + local -r aws_region="$4" + local -r storage_type="$5" + + local -r parameter_name=$(get_acl_token_parameter_name $cluster_name $token_name) + aws ssm put-parameter --name $parameter_name --value $token --type SecureString --region $aws_region + +} \ No newline at end of file diff --git a/modules/run-consul/consul-common.sh b/modules/run-consul/consul-common.sh new file mode 100644 index 00000000..2a87576a --- /dev/null +++ b/modules/run-consul/consul-common.sh @@ -0,0 +1,496 @@ +#!/bin/bash + +set -e + +source "/opt/gruntwork/bash-commons/log.sh" +source "/opt/gruntwork/bash-commons/string.sh" +source "/opt/gruntwork/bash-commons/assert.sh" +source "/opt/gruntwork/bash-commons/aws-wrapper.sh" + +function log { + local -r level="$1" + local -r message="$2" + local -r timestamp=$(date +"%Y-%m-%d %H:%M:%S") + >&2 echo -e "${timestamp} [${level}] [$SCRIPT_NAME] ${message}" +} + +function log_info { + local -r message="$1" + log "INFO" "$message" +} + +function log_warn { + local -r message="$1" + log "WARN" "$message" +} + +function log_error { + local -r message="$1" + log "ERROR" "$message" +} + +# Based on code from: http://stackoverflow.com/a/16623897/483528 +function strip_prefix { + local -r str="$1" + local -r prefix="$2" + echo "${str#$prefix}" +} + +function assert_not_empty { + local -r arg_name="$1" + local -r arg_value="$2" + + if [[ -z "$arg_value" ]]; then + log_error "The value for '$arg_name' cannot be empty" + print_usage + exit 1 + fi +} + +function lookup_path_in_instance_metadata { + local -r path="$1" + curl --silent --show-error --location "$EC2_INSTANCE_METADATA_URL/$path/" +} + +function lookup_path_in_instance_dynamic_data { + local -r path="$1" + curl --silent --show-error --location "$EC2_INSTANCE_DYNAMIC_DATA_URL/$path/" +} + +function get_instance_ip_address { + lookup_path_in_instance_metadata "local-ipv4" +} + +function get_instance_id { + lookup_path_in_instance_metadata "instance-id" +} + +function get_instance_region { + lookup_path_in_instance_dynamic_data "instance-identity/document" | jq -r ".region" +} + +function get_instance_tags { + local -r instance_id="$1" + local -r instance_region="$2" + local tags="" + local count_tags="" + + log_info "Looking up tags for Instance $instance_id in $instance_region" + for (( i=1; i<="$MAX_RETRIES"; i++ )); do + tags=$(aws ec2 describe-tags \ + --region "$instance_region" \ + --filters "Name=resource-type,Values=instance" "Name=resource-id,Values=${instance_id}") + count_tags=$(echo $tags | jq -r ".Tags? | length") + if [[ "$count_tags" -gt 0 ]]; then + log_info "This Instance $instance_id in $instance_region has Tags." + echo "$tags" + return + else + log_warn "This Instance $instance_id in $instance_region does not have any Tags." + log_warn "Will sleep for $SLEEP_BETWEEN_RETRIES_SEC seconds and try again." + sleep "$SLEEP_BETWEEN_RETRIES_SEC" + fi + done + + log_error "Could not find Instance Tags for $instance_id in $instance_region after $MAX_RETRIES retries." + exit 1 +} + +function get_asg_size { + local -r asg_name="$1" + local -r aws_region="$2" + local asg_json="" + + log_info "Looking up the size of the Auto Scaling Group $asg_name in $aws_region" + asg_json=$(aws autoscaling describe-auto-scaling-groups --region "$aws_region" --auto-scaling-group-names "$asg_name") + echo "$asg_json" | jq -r '.AutoScalingGroups[0].DesiredCapacity' +} + +function get_cluster_size { + local -r instance_tags="$1" + local -r aws_region="$2" + + local asg_name="" + asg_name=$(get_tag_value "$instance_tags" "$AWS_ASG_TAG_KEY") + if [[ -z "$asg_name" ]]; then + log_warn "This EC2 Instance does not appear to be part of an Auto Scaling Group, so cannot determine cluster size. Setting cluster size to 1." + echo 1 + else + get_asg_size "$asg_name" "$aws_region" + fi +} + +# Get the value for a specific tag from the tags JSON returned by the AWS describe-tags: +# https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-tags.html +function get_tag_value { + local -r tags="$1" + local -r tag_key="$2" + + echo "$tags" | jq -r ".Tags[] | select(.Key == \"$tag_key\") | .Value" +} + +function assert_is_installed { + local -r name="$1" + + if [[ ! $(command -v ${name}) ]]; then + log_error "The binary '$name' is required by this script but is not installed or in the system's PATH." + exit 1 + fi +} + +function split_by_lines { + local prefix="$1" + shift + + for var in "$@"; do + echo "${prefix}${var}" + done +} + +function generate_systemd_config { + local -r systemd_config_path="$1" + local -r consul_config_dir="$2" + local -r consul_data_dir="$3" + local -r consul_systemd_stdout="$4" + local -r consul_systemd_stderr="$5" + local -r consul_bin_dir="$6" + local -r consul_user="$7" + shift 7 + local -r environment=("$@") + local -r config_path="$consul_config_dir/$CONSUL_CONFIG_FILE" + + log_info "Creating systemd config file to run Consul in $systemd_config_path" + + local -r unit_config=$(cat < "$systemd_config_path" + echo -e "$service_config" >> "$systemd_config_path" + echo -e "$log_config" >> "$systemd_config_path" + echo -e "$install_config" >> "$systemd_config_path" +} + +function start_consul { + log_info "Reloading systemd config and starting Consul" + + sudo systemctl daemon-reload + sudo systemctl enable consul.service + sudo systemctl restart consul.service +} + +# Based on: http://unix.stackexchange.com/a/7732/215969 +function get_owner_of_path { + local -r path="$1" + ls -ld "$path" | awk '{print $3}' +} + +function generate_consul_config { + local -r server="${1}" + local -r config_dir="${2}" + local -r user="${3}" + local -r cluster_tag_key="${4}" + local -r cluster_tag_value="${5}" + local -r datacenter="${6}" + local -r enable_gossip_encryption="${7}" + local -r gossip_encryption_key="${8}" + local -r enable_rpc_encryption="${9}" + local -r verify_server_hostname="${10}" + local -r ca_path="${11}" + local -r cert_file_path="${12}" + local -r key_file_path="${13}" + local -r cleanup_dead_servers="${14}" + local -r last_contact_threshold="${15}" + local -r max_trailing_logs="${16}" + local -r server_stabilization_time="${17}" + local -r redundancy_zone_tag="${18}" + local -r disable_upgrade_migration="${19}" + local -r upgrade_version_tag=${20} + local -r config_path="$config_dir/$CONSUL_CONFIG_FILE" + local -r enable_acl="${21}" + + shift 21 + local -r recursors=("$@") + + local instance_id="" + local instance_ip_address="" + local instance_region="" + # https://www.consul.io/docs/agent/options#ui-1 + local ui_config_enabled="false" + + instance_id=$(get_instance_id) + instance_ip_address=$(get_instance_ip_address) + instance_region=$(get_instance_region) + + local retry_join_json="" + if [[ -z "$cluster_tag_key" || -z "$cluster_tag_value" ]]; then + log_warn "Either the cluster tag key ($cluster_tag_key) or value ($cluster_tag_value) is empty. Will not automatically try to form a cluster based on EC2 tags." + else + retry_join_json=$(cat < "$config_path" + chown "$user:$user" "$config_path" +} + +function generate_bootstrap_acl_token { + local -r max_retries="$1" + local -r sleep_between_retries="$2" + + local token + + for (( i=0; i<"$max_retries"; i++ )); do + token=$(consul acl bootstrap -format=json | jq '.SecretID' -r) + if [[ "$token" == "" ]]; then + log_info "Token could not be obtained, retrying." + sleep $sleep_between_retries + else + echo $token + return + fi + done + + log_error "Unable to obtain ACL token. Aborting." + exit 1 +} + +function generate_node_acl_policy { + local -r node_name="$1" + + local -r policy_hcl=$(cat <&2 echo -e "${timestamp} [${level}] [$SCRIPT_NAME] ${message}" -} - -function log_info { - local -r message="$1" - log "INFO" "$message" -} - -function log_warn { - local -r message="$1" - log "WARN" "$message" -} - -function log_error { - local -r message="$1" - log "ERROR" "$message" -} - -# Based on code from: http://stackoverflow.com/a/16623897/483528 -function strip_prefix { - local -r str="$1" - local -r prefix="$2" - echo "${str#$prefix}" -} - -function assert_not_empty { - local -r arg_name="$1" - local -r arg_value="$2" - - if [[ -z "$arg_value" ]]; then - log_error "The value for '$arg_name' cannot be empty" - print_usage - exit 1 - fi -} - -function lookup_path_in_instance_metadata { - local -r path="$1" - curl --silent --show-error --location "$EC2_INSTANCE_METADATA_URL/$path/" -} - -function lookup_path_in_instance_dynamic_data { - local -r path="$1" - curl --silent --show-error --location "$EC2_INSTANCE_DYNAMIC_DATA_URL/$path/" -} - -function get_instance_ip_address { - lookup_path_in_instance_metadata "local-ipv4" -} - -function get_instance_id { - lookup_path_in_instance_metadata "instance-id" -} - -function get_instance_region { - lookup_path_in_instance_dynamic_data "instance-identity/document" | jq -r ".region" -} - -function get_instance_tags { - local -r instance_id="$1" - local -r instance_region="$2" - local tags="" - local count_tags="" - - log_info "Looking up tags for Instance $instance_id in $instance_region" - for (( i=1; i<="$MAX_RETRIES"; i++ )); do - tags=$(aws ec2 describe-tags \ - --region "$instance_region" \ - --filters "Name=resource-type,Values=instance" "Name=resource-id,Values=${instance_id}") - count_tags=$(echo $tags | jq -r ".Tags? | length") - if [[ "$count_tags" -gt 0 ]]; then - log_info "This Instance $instance_id in $instance_region has Tags." - echo "$tags" - return - else - log_warn "This Instance $instance_id in $instance_region does not have any Tags." - log_warn "Will sleep for $SLEEP_BETWEEN_RETRIES_SEC seconds and try again." - sleep "$SLEEP_BETWEEN_RETRIES_SEC" - fi - done - - log_error "Could not find Instance Tags for $instance_id in $instance_region after $MAX_RETRIES retries." - exit 1 -} - -function get_asg_size { - local -r asg_name="$1" - local -r aws_region="$2" - local asg_json="" - - log_info "Looking up the size of the Auto Scaling Group $asg_name in $aws_region" - asg_json=$(aws autoscaling describe-auto-scaling-groups --region "$aws_region" --auto-scaling-group-names "$asg_name") - echo "$asg_json" | jq -r '.AutoScalingGroups[0].DesiredCapacity' -} - -function get_cluster_size { - local -r instance_tags="$1" - local -r aws_region="$2" - - local asg_name="" - asg_name=$(get_tag_value "$instance_tags" "$AWS_ASG_TAG_KEY") - if [[ -z "$asg_name" ]]; then - log_warn "This EC2 Instance does not appear to be part of an Auto Scaling Group, so cannot determine cluster size. Setting cluster size to 1." - echo 1 - else - get_asg_size "$asg_name" "$aws_region" - fi -} - -# Get the value for a specific tag from the tags JSON returned by the AWS describe-tags: -# https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-tags.html -function get_tag_value { - local -r tags="$1" - local -r tag_key="$2" - - echo "$tags" | jq -r ".Tags[] | select(.Key == \"$tag_key\") | .Value" -} - -function assert_is_installed { - local -r name="$1" - - if [[ ! $(command -v ${name}) ]]; then - log_error "The binary '$name' is required by this script but is not installed or in the system's PATH." - exit 1 - fi -} - -function split_by_lines { - local prefix="$1" - shift - - for var in "$@"; do - echo "${prefix}${var}" - done -} - -function generate_consul_config { - local -r server="${1}" - local -r config_dir="${2}" - local -r user="${3}" - local -r cluster_tag_key="${4}" - local -r cluster_tag_value="${5}" - local -r datacenter="${6}" - local -r enable_gossip_encryption="${7}" - local -r gossip_encryption_key="${8}" - local -r enable_rpc_encryption="${9}" - local -r verify_server_hostname="${10}" - local -r ca_path="${11}" - local -r cert_file_path="${12}" - local -r key_file_path="${13}" - local -r cleanup_dead_servers="${14}" - local -r last_contact_threshold="${15}" - local -r max_trailing_logs="${16}" - local -r server_stabilization_time="${17}" - local -r redundancy_zone_tag="${18}" - local -r disable_upgrade_migration="${19}" - local -r upgrade_version_tag=${20} - local -r config_path="$config_dir/$CONSUL_CONFIG_FILE" - - shift 20 - local -r recursors=("$@") - - local instance_id="" - local instance_ip_address="" - local instance_region="" - # https://www.consul.io/docs/agent/options#ui-1 - local ui_config_enabled="false" - - instance_id=$(get_instance_id) - instance_ip_address=$(get_instance_ip_address) - instance_region=$(get_instance_region) - - local retry_join_json="" - if [[ -z "$cluster_tag_key" || -z "$cluster_tag_value" ]]; then - log_warn "Either the cluster tag key ($cluster_tag_key) or value ($cluster_tag_value) is empty. Will not automatically try to form a cluster based on EC2 tags." - else - retry_join_json=$(cat < "$config_path" - chown "$user:$user" "$config_path" -} - -function generate_systemd_config { - local -r systemd_config_path="$1" - local -r consul_config_dir="$2" - local -r consul_data_dir="$3" - local -r consul_systemd_stdout="$4" - local -r consul_systemd_stderr="$5" - local -r consul_bin_dir="$6" - local -r consul_user="$7" - shift 7 - local -r environment=("$@") - local -r config_path="$consul_config_dir/$CONSUL_CONFIG_FILE" - - log_info "Creating systemd config file to run Consul in $systemd_config_path" - - local -r unit_config=$(cat < "$systemd_config_path" - echo -e "$service_config" >> "$systemd_config_path" - echo -e "$log_config" >> "$systemd_config_path" - echo -e "$install_config" >> "$systemd_config_path" -} - -function start_consul { - log_info "Reloading systemd config and starting Consul" - - sudo systemctl daemon-reload - sudo systemctl enable consul.service - sudo systemctl restart consul.service -} - -# Based on: http://unix.stackexchange.com/a/7732/215969 -function get_owner_of_path { - local -r path="$1" - ls -ld "$path" | awk '{print $3}' -} - function run { local server="false" local client="false" @@ -446,6 +103,8 @@ function run { local server_stabilization_time="$DEFAULT_AUTOPILOT_SERVER_STABILIZATION_TIME" local redundancy_zone_tag="$DEFAULT_AUTOPILOT_REDUNDANCY_ZONE_TAG" local disable_upgrade_migration="$DEFAULT_AUTOPILOT_DISABLE_UPGRADE_MIGRATION" + local enable_acl + local acl_storage_type while [[ $# -gt 0 ]]; do local key="$1" @@ -573,6 +232,14 @@ function run { --skip-consul-config) skip_consul_config="true" ;; + --enable-acl) + enable_acl="true" + ;; + --acl-storage-type) + assert_not_empty "$key" "$2" + acl_storage_type="$2" + shift + ;; --recursor) assert_not_empty "$key" "$2" recursors+=("$2") @@ -592,11 +259,36 @@ function run { shift done + if [[ "$enable_acl" == "true" ]]; then + if [ -z "$acl_storage_type" ] || [ "$acl_storage_type" == "" ]; then + log_error "You must specify an option for the --acl-storage-type parameter when --enable-acl is specified." + exit 1 + fi + + local storage_type_matched="false" + + # Source appropriate storage provider script + case "$acl_storage_type" in + 'ssm' | 'SSM') + storage_type_matched="true" + source ${SCRIPT_DIR}/consul-bootstrap-ssm.sh;; + + *) + if [ $storage_type_matched="false" ]; then + log_error "ACL storage type '${acl_storage_type}' is not supported." + exit 1 + fi;; + + esac + fi + if [[ ("$server" == "true" && "$client" == "true") || ("$server" == "false" && "$client" == "false") ]]; then log_error "Exactly one of --server or --client must be set." exit 1 fi + # TODO: Add step to ensure bash-commons is installed if --enable-acl is set + assert_is_installed "systemctl" assert_is_installed "aws" assert_is_installed "curl" @@ -636,6 +328,7 @@ function run { assert_not_empty "--key_file_path" "$key_file_path" fi + log_info "Creating Consul configuration" generate_consul_config "$server" \ "$config_dir" \ "$user" \ @@ -656,11 +349,77 @@ function run { "$redundancy_zone_tag" \ "$disable_upgrade_migration" \ "$upgrade_version_tag" \ + "$enable_acl" "${recursors[@]}" fi + log_info "Creating Consul systemd file" generate_systemd_config "$SYSTEMD_CONFIG_PATH" "$config_dir" "$data_dir" "$systemd_stdout" "$systemd_stderr" "$bin_dir" "$user" "${environment[@]}" + log_info "Starting Consul" start_consul + + if [[ "$enable_acl" == "true" ]]; then + local -r asg_name=$(aws_wrapper_get_asg_name $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) + local -r aws_region=$(aws_get_instance_region) + local -r instance_id=$(aws_get_instance_id) + + local bootstrap_token + + # Calculate the rally point server for the ASG + local -r rally_point_hostname=$(aws_wrapper_get_asg_rally_point $asg_name $aws_region "false") + log_info "Calculated rally point instance is $rally_point_hostname." + local -r local_hostname=$(aws_wrapper_get_hostname) + log_info "Local hostname is $local_hostname" + + if [[ "$rally_point_hostname" == "$local_hostname" ]]; then + log_info "Checking if bootstrap token already exists" + local -r existing_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region 1 0 "true") + + if [[ "$existing_token" == "" ]] && [ "${server}" == "true" ]; then + log_info "Generating bootstrap ACL token" + bootstrap_token=$(generate_bootstrap_acl_token $MAX_RETRIES $SLEEP_BETWEEN_RETRIES_SEC) + log_info "Persisting bootstrap token to SSM parameter" + write_acl_token $bootstrap_token $cluster_tag_value "bootstrap" $aws_region "ssm" + else + log_info "Bootstrap token already exists, skipping" + fi + fi + + # If the bootstrap token isn't already read (i.e. if this is running on the non rally point node) + # then we need to read it. + if [[ -z "$bootstrap_token" ]]; then + log_info "Acquiring bootstrap token" + bootstrap_token=$(read_acl_token $cluster_tag_value "bootstrap" $aws_region) + fi + + local -r consul_major_version=$(get_consul_major_version) + local -r consul_minor_version=$(get_consul_minor_version) + local -r consul_build_version=$(get_consul_build_version) + + + local agent_token + + if ([ $consul_major_version -ge 1 ] && ( [ $consul_minor_version -ge 9 ] || ([ $consul_minor_version -eq 8 ] && [ $consul_build_version -ge 1 ]))) || [ $consul_major_version -gt 1 ]; then + # Generate the agent token using the newer method + log_info "Creating agent token using -node-idenity" + agent_token=$(generate_agent_token $instance_id $datacenter $bootstrap_token) + else + # Generate agent policy dynamically and write it + log_info "Creating agent policy and token" + local -r agent_policy=$(generate_node_acl_policy $local_hostname) + write_acl_policy "$instance_id" "$agent_policy" "$bootstrap_token" + + # Generate agent token and persist it + agent_token=$(generate_token "$instance_id" "$instance_id agent policy" $bootstrap_token) + fi + + if [ -z "$agent_token" ] || [ "$agent_token" == "" ]; then + log_error "Unable to acquire agent token. Aborting." + exit 1 + else + set_agent_token "$agent_token" "$bootstrap_token" + fi + fi } run "$@" diff --git a/test/README.md b/test/README.md index 874818e6..d5fc424d 100644 --- a/test/README.md +++ b/test/README.md @@ -26,7 +26,6 @@ clean up. ### Prerequisites - Install the latest version of [Go](https://golang.org/). -- Install [dep](https://github.com/golang/dep) for Go dependency management. - Install [Terraform](https://www.terraform.io/downloads.html). - Configure your AWS credentials using one of the [options supported by the AWS SDK](http://docs.aws.amazon.com/sdk-for-java/v1/developer-guide/credentials.html). Usually, the easiest option is to @@ -35,11 +34,11 @@ clean up. ### One-time setup -Download Go dependencies using dep: +Download Go dependencies: ``` cd test -dep ensure +go get . ``` @@ -60,5 +59,15 @@ cd test go test -v -timeout 60m -run TestFoo ``` +### Running tests without building the AMI every time + +In certain cases you may want to run the deploy / validate tests without building an AMI every time. + +```bash +# Build the AMIs +SKIP_deploy="true" SKIP_validate="true" SKIP_teardown="true" go test -v -timeout 60m + +# Run the tests, which you can do over and over again +SKIP_setup_ami="true" go test -v -timeout 60m \ No newline at end of file diff --git a/test/consul_cluster_test.go b/test/consul_cluster_test.go index 179f09e0..ad7dfed1 100644 --- a/test/consul_cluster_test.go +++ b/test/consul_cluster_test.go @@ -7,17 +7,17 @@ import ( // Test the example in the root folder func TestConsulClusterWithUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "",false) } // Test the example in the root folder func TestConsulClusterWithUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", "",false) } // Test the example in the root folder func TestConsulClusterWithAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", "") + runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", "",false) } diff --git a/test/consul_cluster_with_acl_ssm_test.go b/test/consul_cluster_with_acl_ssm_test.go new file mode 100644 index 00000000..82a7576c --- /dev/null +++ b/test/consul_cluster_with_acl_ssm_test.go @@ -0,0 +1,23 @@ +package test + +import ( + "testing" +) + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmUbuntu16Ami(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ubuntu", "",true) +} + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmUbuntu18Ami(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ubuntu", "",true) +} + +// Test the example in the example-with-acl folder +func TestConsulClusterWithAclSsmAmazonLinuxAmi(t *testing.T) { + t.Parallel() + runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-acl", "../examples/consul-ami/consul.json", "ec2-user", "",true) +} diff --git a/test/consul_cluster_with_custom_asg_role_test.go b/test/consul_cluster_with_custom_asg_role_test.go index 96595f03..a2269875 100644 --- a/test/consul_cluster_with_custom_asg_role_test.go +++ b/test/consul_cluster_with_custom_asg_role_test.go @@ -11,7 +11,7 @@ func TestConsulClusterWithCustomASGRoleUbuntu16Ami(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "ubuntu16-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "") + runConsulClusterTestWithVars(t, "ubuntu16-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "",false) } func TestConsulClusterWithCustomASGRoleUbuntu18Ami(t *testing.T) { @@ -19,7 +19,7 @@ func TestConsulClusterWithCustomASGRoleUbuntu18Ami(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "ubuntu18-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "") + runConsulClusterTestWithVars(t, "ubuntu18-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ubuntu", terraformVars, "",false) } func TestConsulClusterWithCustomASGRoleAmazonLinuxAmi(t *testing.T) { @@ -27,5 +27,5 @@ func TestConsulClusterWithCustomASGRoleAmazonLinuxAmi(t *testing.T) { terraformVars := map[string]interface{}{ "consul_service_linked_role_suffix": random.UniqueId(), } - runConsulClusterTestWithVars(t, "amazon-linux-2-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ec2-user", terraformVars, "") + runConsulClusterTestWithVars(t, "amazon-linux-2-ami", "examples/example-with-custom-asg-role", "../examples/consul-ami/consul.json", "ec2-user", terraformVars, "",false) } diff --git a/test/consul_cluster_with_encryption_test.go b/test/consul_cluster_with_encryption_test.go index 2149dd3a..7a3eb8be 100644 --- a/test/consul_cluster_with_encryption_test.go +++ b/test/consul_cluster_with_encryption_test.go @@ -4,15 +4,15 @@ import "testing" func TestConsulClusterWithEncryptionUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu16-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "",false) } func TestConsulClusterWithEncryptionUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "") + runConsulClusterTest(t, "ubuntu18-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ubuntu", "",false) } func TestConsulClusterWithEncryptionAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ec2-user", "") + runConsulClusterTest(t, "amazon-linux-2-ami", "examples/example-with-encryption", "../examples/example-with-encryption/packer/consul-with-certs.json", "ec2-user", "",false) } diff --git a/test/consul_enterprise_test.go b/test/consul_enterprise_test.go index 6632f052..4bbada7e 100644 --- a/test/consul_enterprise_test.go +++ b/test/consul_enterprise_test.go @@ -8,17 +8,17 @@ import ( // Test the example in the root folder func TestConsulInstallFromURLWithUbuntu16Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t)) + runConsulClusterTest(t, "ubuntu16-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t),false) } func TestConsulInstallFromURLWithUbuntu18Ami(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t)) + runConsulClusterTest(t, "ubuntu18-ami", ".", "../examples/consul-ami/consul.json", "ubuntu", getUrlFromEnv(t),false) } func TestConsulInstallFromURLWithAmazonLinuxAmi(t *testing.T) { t.Parallel() - runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", getUrlFromEnv(t)) + runConsulClusterTest(t, "amazon-linux-2-ami", ".", "../examples/consul-ami/consul.json", "ec2-user", getUrlFromEnv(t),false) } // To test this on circle ci you need a url set as an environment variable, CONSUL_AMI_TEMPLATE_VAR_DOWNLOAD_URL diff --git a/test/consul_helpers.go b/test/consul_helpers.go index 15381128..8b098abb 100644 --- a/test/consul_helpers.go +++ b/test/consul_helpers.go @@ -41,17 +41,18 @@ const AWS_DEFAULT_REGION_ENV_VAR = "AWS_DEFAULT_REGION" // 2. Building the AMI in the consul-ami example with the given build name // 3. Deploying that AMI using the consul-cluster Terraform code // 4. Checking that the Consul cluster comes up within a reasonable time period and can respond to requests -func runConsulClusterTest(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, enterpriseUrl string) { +func runConsulClusterTest(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, enterpriseUrl string, enableAcl bool) { runConsulClusterTestWithVars(t, packerBuildName, examplesFolder, packerTemplatePath, sshUser, map[string]interface{}{}, - enterpriseUrl) + enterpriseUrl, + enableAcl,) } -func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, terraformVarsMerge map[string]interface{}, enterpriseUrl string) { +func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examplesFolder string, packerTemplatePath string, sshUser string, terraformVarsMerge map[string]interface{}, enterpriseUrl string, enableAcl bool) { // Uncomment any of the following to skip that section during the test // os.Setenv("SKIP_setup_ami", "true") // os.Setenv("SKIP_deploy", "true") @@ -122,18 +123,41 @@ func runConsulClusterTestWithVars(t *testing.T, packerBuildName string, examples } // Check the Consul servers - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_SERVER_ASG_NAME, terraformOptions, awsRegion, enableAcl) // Check the Consul clients - checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion) + checkConsulClusterIsWorking(t, CONSUL_CLUSTER_EXAMPLE_OUTPUT_CLIENT_ASG_NAME, terraformOptions, awsRegion, enableAcl) }) } // Check that the Consul cluster comes up within a reasonable time period and can respond to requests -func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terratestOptions *terraform.Options, awsRegion string) { +func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terratestOptions *terraform.Options, awsRegion string, enableAcl bool) { asgName := terraform.OutputRequired(t, terratestOptions, asgNameOutputVar) nodeIpAddress := getIpAddressOfAsgInstance(t, asgName, awsRegion) - testConsulCluster(t, nodeIpAddress) + + maxRetries := 60 + sleepBetweenRetries := 10 * time.Second + + token := "" + + if enableAcl { + // TODO: Actually retrieve the token here + token = retry.DoWithRetry(t, "Check for SSM token", maxRetries, sleepBetweenRetries, func() (string,error) { + parameterName := fmt.Sprintf("/%s/token/bootstrap",terratestOptions.Vars["cluster_name"]) + token, err := aws.GetParameterE(t, awsRegion, parameterName) + if err != nil { + return "", err + } + return token, nil + }) + } + + clientArgs := CreateConsulClientArgs{ + ipAddress: nodeIpAddress, + token: token, + } + + testConsulCluster(t, &clientArgs) } // Use a Consul client to connect to the given node and use it to verify that: @@ -141,8 +165,8 @@ func checkConsulClusterIsWorking(t *testing.T, asgNameOutputVar string, terrates // 1. The Consul cluster has deployed // 2. The cluster has the expected number of members // 3. The cluster has elected a leader -func testConsulCluster(t *testing.T, nodeIpAddress string) { - consulClient := createConsulClient(t, nodeIpAddress) +func testConsulCluster(t *testing.T, clientArgs *CreateConsulClientArgs) { + consulClient := createConsulClient(t, clientArgs) maxRetries := 60 sleepBetweenRetries := 10 * time.Second expectedMembers := CONSUL_CLUSTER_EXAMPLE_DEFAULT_NUM_CLIENTS + CONSUL_CLUSTER_EXAMPLE_DEFAULT_NUM_SERVERS @@ -169,14 +193,40 @@ func testConsulCluster(t *testing.T, nodeIpAddress string) { return leader, nil }) + if clientArgs.token != "" { + logger.Logf(t, "Attempting to retrieve members without token") + consulClient = createConsulClient(t, &CreateConsulClientArgs{ + ipAddress: clientArgs.ipAddress, + token: "", + }) + leader = retry.DoWithRetry(t, "Check for empty members with no token", maxRetries, sleepBetweenRetries, func() (string, error) { + members, err := consulClient.Agent().Members(false) + if err != nil { + return "", err + } + if len(members) != 0 { + return "",fmt.Errorf("expected an empty member list when not using token, found %d members instead", len(members)) + } + + return "",nil + }) + } + logger.Logf(t, "Consul cluster is properly deployed and has elected leader %s", leader) } +type CreateConsulClientArgs struct { + ipAddress string + token string +} + // Create a Consul client -func createConsulClient(t *testing.T, ipAddress string) *api.Client { +func createConsulClient(t *testing.T, clientArgs *CreateConsulClientArgs) *api.Client { config := api.DefaultConfig() - config.Address = fmt.Sprintf("%s:8500", ipAddress) - + config.Address = fmt.Sprintf("%s:8500", clientArgs.ipAddress) + if clientArgs.token != "" { + config.Token = clientArgs.token + } client, err := api.NewClient(config) if err != nil { t.Fatalf("Failed to create Consul client due to error: %v", err)