diff --git a/.editorconfig b/.editorconfig new file mode 100644 index 000000000..0df80d477 --- /dev/null +++ b/.editorconfig @@ -0,0 +1,5 @@ +[*.kt] +end_of_line = lf +insert_final_newline = true +indent_style = space +indent_size = 4 \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..0f26ec342 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ +# Intellij Idea project files +.idea +*.iml +*.ipr +*.iws + +# gradle config +.gradle + +# project binaries +build +out +classes + +# sonar +sonar-project.properties +.sonar + +# mac os x +.DS_Store + +# netbeans +.nb-gradle + +/config +!/config/detekt/ +!/config/detekt/* + +deployment.yml diff --git a/build.gradle b/build.gradle new file mode 100644 index 000000000..27356a7f0 --- /dev/null +++ b/build.gradle @@ -0,0 +1,127 @@ +buildscript { + repositories { + mavenCentral() + } + dependencies { + classpath group: 'pl.allegro.tech.build', name: 'axion-release-plugin', version: '1.10.2' + } +} + +plugins { + id 'pl.allegro.tech.build.axion-release' version '1.10.2' + id 'org.jetbrains.kotlin.jvm' version '1.3.0' + id 'org.jetbrains.kotlin.plugin.spring' version '1.3.0' + id 'org.jetbrains.kotlin.plugin.allopen' version '1.3.0' + id "org.jlleitschuh.gradle.ktlint" version "6.3.1" + id "org.jlleitschuh.gradle.ktlint-idea" version "6.3.1" + id "io.gitlab.arturbosch.detekt" version "1.0.0-RC11" +} + +scmVersion { + tag { + prefix = project.rootProject.name + } + versionCreator 'versionWithBranch' +} + +allprojects { + + project.group = 'pl.allegro.tech.servicemesh' + project.version = scmVersion.version + + repositories { + jcenter() + mavenCentral() + } + + apply plugin: 'kotlin' + apply plugin: 'pl.allegro.tech.build.axion-release' + apply plugin: 'kotlin-spring' + + project.ext.versions = [ + kotlin : '1.3.0', + java_controlplane : '0.1.16', + spring_boot : '2.1.5.RELEASE', + grpc : '1.21.0', + jaxb : '2.3.0', + javaxactivation : '1.1.1', + micrometer : '1.1.2', + dropwizard : '4.0.5', + ecwid_consul : '1.4.1', + awaitility : '3.1.3', + embedded_consul : '2.0.0', + junit : '5.3.2', + assertj : '3.11.1', + jackson : '2.9.0', + toxiproxy : '2.1.3', + testcontainers : '1.10.6', + reactor : '3.2.5.RELEASE', + consul_recipes : '0.8.3', + mockito : '2.23.0', + cglib : '3.2.9', + logback : '1.2.3', + slf4j : '1.7.25' + ] +} + +subprojects { + + apply plugin: 'maven-publish' + apply plugin: 'pl.allegro.tech.build.axion-release' + apply plugin: 'org.jlleitschuh.gradle.ktlint' + apply plugin: 'io.gitlab.arturbosch.detekt' + + sourceCompatibility = 1.8 + [compileJava, compileTestJava]*.options*.encoding = 'UTF-8' + + test { + useJUnitPlatform() + testLogging { + exceptionFormat = 'full' + } + } + + publishing { + publications { + maven(MavenPublication) { + from project.components.java + } + } + } + + configurations { + compile.exclude group: 'commons-logging', module: 'commons-logging' + compile.exclude group: 'org.slf4j', module: 'slf4j-log4j12' + compile.exclude group: 'org.slf4j', module: 'slf4j-jcl' + compile.exclude group: 'log4j', module: 'log4j' + } + + compileKotlin { + kotlinOptions { + jvmTarget = '1.8' + } + } + + compileTestKotlin { + kotlinOptions { + jvmTarget = '1.8' + } + } + + dependencies { + testCompile group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: versions.junit + testCompile group: 'org.assertj', name: 'assertj-core', version: versions.assertj + testRuntimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: versions.junit + } + + detekt { + toolVersion = "1.0.0-RC11" + input = files("src/main/kotlin", "src/test/kotlin") + filters = ".*/resources/.*,.*/build/.*" + config = files("$rootDir/config/detekt/default-detekt-config.yml", "$rootDir/config/detekt/detekt-config.yml") + } +} + +wrapper { + gradleVersion = '5.2.1' +} diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 000000000..9bece67bb --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,56 @@ +# Architecture + +## High level + +This is high level view of Service Mesh system with Envoy Control + +![high level architecture](assets/images/high_level_architecture.png) + +In each data center, Envoy Control polls the services location from a discovery service system. Then, the state +is propagated to Envoy instances running alongside service instances. + +When _service-a_ wants to communicate with _service-b_ it sends a request to it, but the request is intercepted +by Envoy, which will redirect the request to proper instance. Envoy can also add tracing headers, add encryption, +circuit breaking and much more. + +## Envoy control + +Envoy Control is responsible for feeding Envoys with configuration of +[CDS](https://www.envoyproxy.io/docs/envoy/latest/configuration/upstream/cluster_manager/cds), +[EDS](https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/upstream/service_discovery#arch-overview-service-discovery-types-eds), +and [RDS](https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_conn_man/rds.html) data based on custom metadata. +Right now CDS and EDS data comes from Consul service discovery, +but there is nothing special about our integration with Consul and users can integrate as many sources as they want. + +![envoy control modules drawing](assets/images/envoy-control-modules-drawing.png) + +## Sources + +Source is a stream of `cluster` and `endpoints` states. + +There can be many sources, all they have to do is: + +* implement `LocalServiceChanges` +* be exposed as a bean - if you're using Envoy Control Runner then all of them will be combined in `GlobalServiceChanges`, +if not - you have to combine them yourself + +### Consul +Implements a stream of service instance changes coming from Consul discovery service. + +## Modules + +### Envoy Control +The core module that provides integration with Envoy and API to integrate Discovery Service system. + +### Envoy Control Runner +Example of the code that builds Control Plane and runs it. It uses [Spring Framework](https://spring.io/) to connect +elements and serve HTTP endpoint and HTTP client for [Cross DC Synchronization](features/multi_dc_support.md) feature. + +#### Why Spring? +We've chosen Spring for Envoy Control Runner because it provides an easy way to create HTTP server and client. +On top of that, it also provides Dependency Injection and property management. +You can easily replace it with your framework of choice - Envoy Control module as well as source modules are framework-agnostic. + +### Extensibility +If you want to extend Envoy Control you can either depend on Envoy Control module and create your own Runner or you can +depend on the Envoy Control Runner itself and provide only minimal modifications. \ No newline at end of file diff --git a/docs/assets/extra.js b/docs/assets/extra.js new file mode 100644 index 000000000..979a34e9c --- /dev/null +++ b/docs/assets/extra.js @@ -0,0 +1,29 @@ +// source: https://github.com/squidfunk/mkdocs-material/issues/767 +document.addEventListener("DOMContentLoaded", function() { + load_navpane(); +}); + +function load_navpane() { + var width = window.innerWidth; + if (width <= 1200) { + return; + } + + var nav = document.getElementsByClassName("md-nav"); + for(var i = 0; i < nav.length; i++) { + if (typeof nav.item(i).style === "undefined") { + continue; + } + + if (nav.item(i).getAttribute("data-md-level") && nav.item(i).getAttribute("data-md-component")) { + nav.item(i).style.display = 'block'; + nav.item(i).style.overflow = 'visible'; + } + } + + var nav = document.getElementsByClassName("md-nav__toggle"); + for(var i = 0; i < nav.length; i++) { + nav.item(i).checked = true; + } +} + diff --git a/docs/assets/images/envoy-control-modules-drawing.png b/docs/assets/images/envoy-control-modules-drawing.png new file mode 100644 index 000000000..dc59da0b3 Binary files /dev/null and b/docs/assets/images/envoy-control-modules-drawing.png differ diff --git a/docs/assets/images/high_level_architecture.png b/docs/assets/images/high_level_architecture.png new file mode 100644 index 000000000..9f8507afa Binary files /dev/null and b/docs/assets/images/high_level_architecture.png differ diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 000000000..661eda4e5 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,94 @@ +# Configuration + +This is a global list of all the settings + +## GRPC server +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.server.executorGroup.type** | Group executor type. DIRECT or PARALLEL | DIRECT +**envoy-control.server.executorGroup.parallelPoolSize** | Pool size used for executor group in PARALLEL mode | 4 +**envoy-control.server.nioEventLoopThreadCount** | The number of threads that will be used by netty's nio event loop | 1 +**envoy-control.server.nioEventLoopPoolSize** | Pool size of NIO Event Loop | 0 (Number of CPUs * 2) +**envoy-control.server.netty.keepAliveTime** | Sets a custom keepalive time for Netty server | 15s +**envoy-control.server.netty.permitKeepAliveTime** | Specify the most aggressive keep-alive time clients are permitted to configure (in seconds) | 10s +**envoy-control.server.netty.permitKeepAliveWithoutCalls** | Sets whether to allow clients to send keep-alive HTTP/2 PINGs even if there are no outstanding RPCs on the connection | true +**envoy-control.server.port** | Port of the xDS server | 50000 +**envoy-control.server.serverPoolSize** | Pool size of xDS server | 16 +**envoy-control.server.serverPoolKeepAlive** | Threads keep alive in xDS server pool | 10m +**envoy-control.server.snapshotCleanup.collectAfterMillis** | How long a snapshot must be referenced before being collected | 10s +**envoy-control.server.snapshotCleanup.collectionIntervalMillis** | How often the collection background action should run | 10s + +## Snapshot properties +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.edsConnectionTimeout** | Connection timeout for EDS clusters | 2s +**envoy-control.envoy.snapshot.egress.clusterNotFoundStatusCode** | Status code when cluster is not found | 503 +**envoy-control.envoy.snapshot.localService.idleTimeout** | Idle timeout between client to envoy | 60s +**envoy-control.envoy.snapshot.localService.responseTimeout** | Response timeout for localService | 15s +**envoy-control.envoy.snapshot.routes.metrics.enabled** | Enable metrics route | false +**envoy-control.envoy.snapshot.routes.metrics.pathPrefix** | Path prefix of metrics | /status/envoy/stats/prometheus +**envoy-control.envoy.snapshot.routes.status.metrics.enabled** | Enable status route | false +**envoy-control.envoy.snapshot.routes.status.metrics.pathPrefix** | Path prefix of metrics | /status +**envoy-control.envoy.snapshot.routes.status.metrics.createVirtualCluster** | Create virtual cluster for status route | false +**envoy-control.envoy.snapshot.stateSampleDuration** | Duration of state sampling (this is used to prevent surges in consul events overloading control plane) | 1s +**envoy-control.envoy.snapshot.xdsClusterName** | Name of cluster for xDS operations | envoy-control-xds + +## Permissions +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.incomingPermissions.enabled** | Enable incoming permissions | false +**envoy-control.envoy.snapshot.incomingPermissions.endpointUnavailableStatusCode** | Status code when endpoint is not available | 503 +**envoy-control.envoy.snapshot.incomingPermissions.clientIdentityHeader** | Header that identifies the client that called the endpoint | x-service-name +**envoy-control.envoy.snapshot.outgoingPermissions.enabled** | Enable outgoing permissions | false +**envoy-control.envoy.snapshot.outgoingPermissions.allServicesDependenciesValue** | Special value that signifies that the service depends on all other services | * +**envoy-control.envoy.snapshot.outgoingPermissions.servicesAllowedToUseWildcard** | Services that are allowed to have the special value in outgoing.dependency field | false + +## Outlier detection +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.clusterOutlierDetection.enabled** | Enable cluster outlier detection | false +**envoy-control.envoy.snapshot.clusterOutlierDetection.baseEjectionTime** | The base time that a host is ejected for | 30s +**envoy-control.envoy.snapshot.clusterOutlierDetection.consecutiveGatewayFailure** | The number of consecutive gateway failures (502, 503, 504 status or connection errors that are mapped to one of those status codes) before a consecutive gateway failure ejection occurs | 5 +**envoy-control.envoy.snapshot.clusterOutlierDetection.consecutive5xx** | The number of consecutive 5xx responses before a consecutive 5xx ejection | 5 +**envoy-control.envoy.snapshot.clusterOutlierDetection.enforcingConsecutiveGatewayFailure** | The % chance that a host will be actually ejected when an outlier status is detected through consecutive gateway failures | 0 +**envoy-control.envoy.snapshot.clusterOutlierDetection.enforcingConsecutive5xx** | The % chance that a host will be actually ejected when an outlier status is detected through consecutive 5xx | 100 +**envoy-control.envoy.snapshot.clusterOutlierDetection.enforcingSuccessRate** | The % chance that a host will be actually ejected when an outlier status is detected through success rate statistics | 100 +**envoy-control.envoy.snapshot.clusterOutlierDetection.interval** | The time interval between ejection analysis sweeps | 10s +**envoy-control.envoy.snapshot.clusterOutlierDetection.maxEjectionPercent** | The maximum % of an upstream cluster that can be ejected due to outlier detection | 10 +**envoy-control.envoy.snapshot.clusterOutlierDetection.successRateMinimumHosts** | The number of hosts in a cluster that must have enough request volume to detect success rate outliers | 5 +**envoy-control.envoy.snapshot.clusterOutlierDetection.successRateRequestVolume** | The minimum number of total requests that must be collected in one interval (as defined by the interval duration above) to include this host * in success rate based outlier detection | 100 +**envoy-control.envoy.snapshot.clusterOutlierDetection.successRateStdevFactor** | This factor is used to determine the ejection threshold for success rate outlier ejection. | 1900 + +## Retries +Property | Description | Default value +----------------------------------------------------------------------------------------------------| ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.envoy.snapshot.localService.retryPolicy.\.enabled** | Enable retry policy for localService | false +**envoy-control.envoy.snapshot.localService.retryPolicy.\.retryOn** | When should envoy retry request [Envoy V2 API retry-on](https://www.envoyproxy.io/docs/envoy/latest/configuration/http_filters/router_filter#x-envoy-retry-on) | empty list +**envoy-control.envoy.snapshot.localService.retryPolicy.\.numRetries** | Number of retries | 1 +**envoy-control.envoy.snapshot.localService.retryPolicy.\.perTryTimeout** | Specifies a non-zero upstream timeout per retry attempt | 0ms +**envoy-control.envoy.snapshot.localService.retryPolicy.\.hostSelectionRetryMaxAttempts** | The maximum number of times host selection will be reattempted before request being routed to last selected host | 1 +**envoy-control.envoy.snapshot.localService.retryPolicy.\.retriableStatusCodes** | HTTP status codes for which envoy should trigger retry in addition to retryOn | empty list + +Where `` is one of the following: +* `perHttpMethod.{GET,HEAD,POST,PUT,DELETE}` - retry policy for requests with given HTTP method +* `default` - default retry policy, applied for every request that doesn't match more specific selector + +## Cross DC synchronization +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.sync.enabled** | Enable Cross DC Synchronization | false +**envoy-control.sync.connectionTimeout** | Connection timeout to other Envoy Controls | 1s +**envoy-control.sync.envoyControlAppName** | Envoy Control app name available in discovery service | envoy-control +**envoy-control.sync.pollingInterval** | Polling interval in seconds | 1 +**envoy-control.sync.readTimeout** | Read timeout to other Envoy Controls | 500ms + +## Service filters +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.serviceFilters.excludedNamesPatterns** | Regex for excluding services with a given name | empty list + +## Consul +Property | Description | Default value +------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------- +**envoy-control.source.consul.host** | Hostname of consul server | localhost +**envoy-control.source.consul.port** | Port of consul server | 8500 diff --git a/docs/deployment/deployment.md b/docs/deployment/deployment.md new file mode 100644 index 000000000..c2db9b71e --- /dev/null +++ b/docs/deployment/deployment.md @@ -0,0 +1,37 @@ +# Deployment + +## Dependencies + +Envoy Control requires a Consul cluster to run. See [Consul Configuration](../integrations/consul.md) section on +how to connect to a cluster. + +## Scalability + +Envoy Control is a stateless application, which means that there can be as many instances running in the same cluster as needed. + +## Envoy Configuration + +Example Envoy configuration that is compatible with Envoy Control is available in tests. + + +## Envoy Control Configuration + +When running Envoy Control Runner, you can configure the app in Spring's way. + +### Environment variables + +Use `ENVOY_CONTROL_RUNNER_OPTS` environment variable to override configuration. + +Example +```bash +export ENVOY_CONTROL_RUNNER_OPTS="-Denvoy-control.consul-host=127.0.0.1 -Denvoy-control.source.consul.port=18500" +``` + +### External configuration + +Instead of overriding every property, it is possible to provide a YAML configuration file. +```bash +export SPRING_CONFIG_LOCATION="file://path/properties.yaml" +``` \ No newline at end of file diff --git a/docs/deployment/observability.md b/docs/deployment/observability.md new file mode 100644 index 000000000..96f488154 --- /dev/null +++ b/docs/deployment/observability.md @@ -0,0 +1,62 @@ +# Observability + +## Logs + +Envoy Control uses [SLF4J](https://www.slf4j.org/) with [Logback](https://logback.qos.ch/) for logging. + +To override the default settings, point a file via environment variable +```bash +export ENVOY_CONTROL_RUNNER_OPTS="-Dlogging.config=/path/to/logback/logback.xml" +``` +and then run the `bin/envoy-control-runner` created from `distZip` task. + +`java-control-plane` produces quite a lot of logging on `INFO` level. Consider switching it to `WARN` +```xml + + +``` + + +Sample logger configuration is available here. + +## Metrics + +### Envoy Control + +Metric | Description +-----------------------------| ----------------------------------- +**services.added** | Counter of added services events +**services.removed** | Counter of removed services events +**services.instanceChanged** | Counter of instance change events + +Standard [Spring metrics](https://docs.spring.io/spring-boot/docs/current/reference/html/production-ready-metrics.html#production-ready-metrics-meter) (JVM, CPU, HTTP server) are also included. + +### Envoy Control Runner + +Envoy Control Runner exposes a set of metrics on standard Spring Actuator's `/actuator/metrics` endpoint. + +#### Connections + +Metric | Description +-----------------------------| -------------------------------------------------------- +**grpc.connections.ads** | Number of running gRPC ADS connections +**grpc.connections.cds** | Number of running gRPC CDS connections +**grpc.connections.eds** | Number of running gRPC EDS connections +**grpc.connections.lds** | Number of running gRPC LDS connections +**grpc.connections.rds** | Number of running gRPC RDS connections +**grpc.connections.sds** | Number of running gRPC SDS connections +**grpc.connections.unknown** | Number of running gRPC connections for unknown resource + +#### Snapshot + +Metric | Description +-------------------------| ---------------------------------- +**cache.groupCount** | Number of unique groups in SnapshotCache + +#### Synchronization + +Metric | Description +----------------------------------------| ------------------------------------------------- +**cross-dc-synchronization.$dc.errors** | Counter of synchronization errors for given DC diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 000000000..728f93c6e --- /dev/null +++ b/docs/development.md @@ -0,0 +1,33 @@ +# Development + +Envoy Control is a [Kotlin](https://kotlinlang.org/) application, it requires JDK 8+ to run it. + +## Running +```bash +./gradlew run +``` + +## Testing +* All tests (unit and integration) +```./gradlew test``` +* Unit +```./gradlew unitTest``` +* Integration +```./gradlew integrationTest``` +* Reliability tests +```./gradlew clean -i -Penvironment=integration :envoy-control-tests:reliabilityTest -DRELIABILITY_FAILURE_DURATION_SECONDS=20``` + +## Packaging +To build a distribution package run +``` +./gradle distZip +``` +The package should be available in `{root}/envoy-control-runner/build/distributions/envoy-control-runner-{version}.zip` + +## Formatter +To apply [ktlint](https://ktlint.github.io/) formatting rules to IntelliJ IDEA. Run: `./gradlew ktlintApplyToIdea` + +## Linter +A linter - [detekt](https://arturbosch.github.io/detekt/) runs when Envoy Control is built. You can run it separately: +`./gradlew detekt`. + diff --git a/docs/ec_vs_other_software.md b/docs/ec_vs_other_software.md new file mode 100644 index 000000000..a9cc049cd --- /dev/null +++ b/docs/ec_vs_other_software.md @@ -0,0 +1,38 @@ +# Envoy Control vs other software + +### Istio +[Istio](https://istio.io/) is the most popular complete Service Mesh solution based on Envoy. +The problem with Istio is that it's almost Kubernetes only. + +The integration with Consul did not scale properly for our use case (see [Integration - Consul](integrations/consul.md)) + +### Linkerd +[Linkerd](https://linkerd.io/) is an alternative to Envoy based Service Meshes. It includes both Data Plane and +Control Plane (Namerd). + +Linkerd v1 Data Plane is built using Scala with Twitter's Finagle library. We feel like Scala is not the best tool for +this job, because of the JRE runtime. This means higher memory footprint and latency due to GC pauses. + +Linkerd v2 was rewritten in Rust to get better performance. Unfortunately, just like Istio - it's Kubernetes only. + +### Consul Connect +[Consul Connect](https://www.consul.io/docs/connect/index.html) is a simple way to deploy Envoy to current +Consul based infrastructure. +The problem with Consul Connect is that it has very limited traffic control capabilities. +We want to have a fallback to instances from other DCs, canary deployment and other features specific to our +infrastructure. This is not possible for the current version of Consul Connect (1.5.1). + +### Rotor +[Rotor](https://github.com/turbinelabs/rotor) is a Control Plane built by Turbine Labs. +The project is no longer maintained because Turbine Labs was shut down. + +The integration with Consul did not scale properly for our use case (see [Integration - Consul](integrations/consul.md)) + +### Go Control Plane / Java Control Plane +[Go Control Plane](https://github.com/envoyproxy/go-control-plane) and +[Java Control Plane](https://github.com/envoyproxy/java-control-plane) are projects that you can base your +Control Plane implementation on. They're not a sufficient Control Plane by themselves as they require connecting to your +Discovery Service. + +Envoy Control is based on Java Control Plane and integrates with Consul by default. It also adds features like +Cross DC Synchronization or Permission management. \ No newline at end of file diff --git a/docs/features/multi_dc_support.md b/docs/features/multi_dc_support.md new file mode 100644 index 000000000..007d3939a --- /dev/null +++ b/docs/features/multi_dc_support.md @@ -0,0 +1,57 @@ +# Multi-DC Support + +Envoy Control is ready to be used in an environment with multiple data centers. +When running services in multiple data centers, you probably want to leverage the fact +that when an application in one data center is down there is a fallback to an application in another DC. + +## Strategies +There are two strategies when running Envoy across many data centers. + +### Edge Envoys +The first strategy is to run a fleet of front proxies (Envoys) at each data center. +When no endpoint of a cluster is available in local data center +the extra routes for each remote data center are registered and requests are forwarded to one of them. +This simplifies Control Plane's logic, but the fleet has to be maintained with HA in mind because it's a single point +of failure. +Additionally, there is a cost of one extra request/response redirect. +The extra challenge here is to not end up in an infinite loop. + +### Instance synchronization +The second strategy is to have all instances from all data centers available in Envoy but with different +[priorities](https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/load_balancing/priority). +Only if there are no instances in local data center, an instance from remote data center will be used. +The main benefit of this approach is a lack of single point of failure and maintainability at the cost of extra logic +in Control Plane. + +Envoy Control supports the second strategy. + +It periodically polls the state of discovery service from Envoy Controls from every other data center. +Then it merges the responses with proper priorities. + +![high level architecture](../assets/images/high_level_architecture.png) + +## Configuration + +### With Envoy Control Runner + +If you use Consul and Envoy Control Runner, it's as easy as changing the property `envoy-control.sync.enabled` to true, +assuming that you register Envoy Control under the `envoy-control` name in Consul. + +You can see a list of settings [here](../configuration.md#cross-dc-synchronization) + +### Without Envoy Control Runner + +If you don't use Envoy Control Runner, you have to fulfil the contract. +Create an endpoint `GET /state` with your framework of choice that will expose current local state of Envoy Control. +The state is available in `LocalServiceChanges#latestServiceState`. + +Then build a `CrossDcServices` class providing: + +* `AsyncControlPlaneClient` - an HTTP client +* `ControlPlaneInstanceFetcher` - the strategy of retrieving other Envoy Control from given DC +* `remoteDC` - list of remote data centers + +Refer to Envoy Control Runner module for a sample implementation. + diff --git a/docs/features/permissions.md b/docs/features/permissions.md new file mode 100644 index 000000000..a73cf7b52 --- /dev/null +++ b/docs/features/permissions.md @@ -0,0 +1,58 @@ +# Permissions + +!!! note + This is an incubating feature + +One of the pillars of Service Mesh is security. +Envoy Control provides a simple and fine-grained way to restrict traffic between applications. +Using Envoy's [metadata](https://www.envoyproxy.io/docs/envoy/latest/api-v2/api/v2/core/base.proto#core-metadata) +section you can provide additional configuration to the Control Plane. +The information provided in `metadata.proxy_settings` section is interpreted by Control Plane +and it will create a corresponding configuration for `Envoy`. +This means that Envoy Control is stateless +but in the future there will be an override mechanism that uses a database to save the configuration. + +An example configuration: + +```yaml +metadata: + ads: true + proxy_settings: + outgoing: + dependencies: + - service: service-a + - service: service-b + - domain: http://www.example.com + incoming: + endpoints: + - path: /example + methods: [GET, DELETE] + clients: [service-first] + - pathPrefix: '' + methods: [POST] + clients: [role-actor] + roles: + - clients: [service-a], service-b] + name: role-actor +``` + +In the `incoming` section this configuration defines access to routes: + +* `/example` + * using a `path` route matcher (more on this in [Envoy documentation](https://www.envoyproxy.io/docs/envoy/latest/api-v2/api/v2/route/route.proto#route-routematch)) + * using methods `GET` and `DELETE` + * to clients `service-first` +* all other routes + * using `prefix` route matcher to a role called `role-actor`. + * using method `POST` + * using a role called `role-actor` + +Roles are just a list of clients. We support `path` and `prefix` route matchers. + +In the outgoing section this configuration defines that this service will be able to reach +services: `service-a` and `service-b` and urls of domain www.example.com using http protocol +(at this moment only http protocol is supported). + +## Configuration + +You can see a list of settings [here](../configuration.md#permissions) diff --git a/docs/features/service_transformers.md b/docs/features/service_transformers.md new file mode 100644 index 000000000..a5c8c4494 --- /dev/null +++ b/docs/features/service_transformers.md @@ -0,0 +1,32 @@ +# Service Transformers + +Service Transformers are a way to filter out and modify services received from the discovery before sending it to Envoy. +Transformers are only applied to the local state of discovery. Remote state of discovery is already transformed by other +instance of Envoy Control. + +## Available Transformers + +There are couple of available transformers + +### Empty Address Filter + +Exclude instances that have an empty address. + +### IP Address Filter + +Exclude instances that contain hostname. Envoy does not support endpoints sent via EDS that has a hostname. + +### Regex Service Instances Filter + +Exclude services with a given name using defined regex. + +## Custom Transformers + +To provide custom Transformer implement `ServiceInstancesTransformer` interface. With Envoy Control Runner, every +transformer available in Spring Context will be picked up and used. With pure Envoy Control, you have to provide +a list of transformers to `LocalServiceChanges` class. + +## Configuration + +You can see a list of settings [here](../configuration.md#service-filters) + diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 000000000..2a42cf82a --- /dev/null +++ b/docs/index.md @@ -0,0 +1,33 @@ +# Envoy Control + +Envoy Control is a production-ready Control Plane for Service Mesh based on [Envoy Proxy](https://www.envoyproxy.io/) +Data Plane that is platform agnostic. + +## Features + +* Exposing data from Service Discovery to [Envoy via gRPC xDS v2 API](integrations/envoy.md) +* Scalable [integration with Consul](integrations/consul.md) +* [Multi-DC support](features/multi_dc_support.md) +* [Permission management](features/permissions.md) +* [Observability](deployment/observability.md) + +## Why another Control Plane? +Our use case for Service Mesh is running 800 microservices on [Mesos](https://mesos.apache.org/) / [Marathon](https://mesosphere.github.io/marathon/) stack. +Some of these services are run on Virtual Machines using the [OpenStack](https://www.openstack.org/) platform. +Most current solutions on the market assume that the platform is [Kubernetes](https://kubernetes.io/). +After evaluating current solutions on the market we decided to build our own Control Plane. +[See comparision](ec_vs_other_software.md) with other popular alternatives. + +## Performance + +Envoy Control is built with [performance in mind](performance.md). It was tested on a real-world production system. +Currently, at [allegro.tech](https://allegro.tech/) there are 800+ microservices which converts to 10k+ Envoys running +across all the environments. With a proper configuration, a single instance of Envoy Control with 2 CPU and 2GB RAM +can easily handle 1k+ Envoys connected to it. + +## Reliability + +Envoy Control includes a suite of reliability tests that checks the behavior of the system under unusual circumstances. +Additionally, there are multiple metrics that help to observe the current condition of the Control Plane. \ No newline at end of file diff --git a/docs/integrations/consul.md b/docs/integrations/consul.md new file mode 100644 index 000000000..bb648fbf0 --- /dev/null +++ b/docs/integrations/consul.md @@ -0,0 +1,21 @@ +# Integration - Consul + +[Consul](https://www.consul.io/) is a highly available and distributed service discovery. Envoy Control provides +first-class integration with Consul. + +## Performance + +Popular Service Mesh solutions provide integration with Consul by polling periodically the state of all services. +Assuming we polled the state each second in order to minimize change propagation latency, we would have to send a request +for a [list of services](https://www.consul.io/api/catalog.html#list-services) and then a +[request per each service](https://www.consul.io/api/catalog.html#list-nodes-for-service). +With 1,000 services, this would generate 1,000 rps per one instance of Control Plane. + +Integration in Envoy Control is based on [blocking queries](https://www.consul.io/api/features/blocking.html). This way +Consul will notify Envoy Control (via long-lasting HTTP requests) that the state of discovery changed. +The implementation used in Envoy Control is available as a +[Consul Recipes library](https://github.com/allegro/consul-recipes/). + +## Configuration + +You can see a list of settings [here](../configuration.md#consul) diff --git a/docs/integrations/envoy.md b/docs/integrations/envoy.md new file mode 100644 index 000000000..4a18a63e0 --- /dev/null +++ b/docs/integrations/envoy.md @@ -0,0 +1,61 @@ +# Integration with Envoy + +Envoy Control exposes configuration to Envoy via +[v2 xDS API](https://www.envoyproxy.io/docs/envoy/latest/configuration/overview/v2_overview). +The integration is based on [java-control-plane](https://github.com/envoyproxy/java-control-plane) project. + +## Sample Envoy configuration + +Sample Envoy configuration that is compatible with Envoy Control is available in tests (`envoy-control/src/test/resources/envoy/config_ads.yaml`). + + +## Routes + +Envoy Control can add some default routes via Route Discovery Service (RDS). + +### Original destination + +By default Envoy does not proxy requests to provided IP address - it's not valid to put an IP address in the `Host` header. +To work around that, a cluster called `envoy-original-destination` is created. +Its name can be used in `Host` header (`Host: envoy-original-destination`) +and the IP can be put in `x-envoy-original-dst-host` header (`x-envoy-original-dst-host: 127.0.0.2`). + +### Catch all route + +By default, Envoy will respond with `404` status code when it receives a request for a cluster that does not exist. +The behavior is changed so that the `503` status code is returned. + +## ADS Support + +By default, the xDS is used instead of +[Aggregated Discovery Service](https://www.envoyproxy.io/docs/envoy/latest/configuration/overview/v2_overview#aggregated-discovery-service) +(ADS). To use ADS for given node put the +``` +ads: true +``` +in Envoy metadata config. Envoy Control will pick it up and use ADS for this node. + +## Outlier detection + +You can configure global +[outlier detection](https://www.envoyproxy.io/docs/envoy/latest/intro/arch_overview/outlier#arch-overview-outlier-detection) +for all clusters with properties [described here](../configuration.md#outlier-detection). + +## Retry policy + +You can configure +[retry policies](https://www.envoyproxy.io/docs/envoy/latest/api-v2/api/v2/route/route.proto#envoy-api-msg-route-retrypolicy) +for ingress traffic with properties [described here](../configuration.md#retries). + +## Metadata + +After Envoy connects to Envoy Control it sends its metadata. +We extract some of the data from it to drive its dynamic configuration. +Right now we're focused on [permissions](../features/permissions.md) +but in the future we will provide options to configure: + +* retries +* timeouts +* circuit breakers and more diff --git a/docs/performance.md b/docs/performance.md new file mode 100644 index 000000000..bc9e649ea --- /dev/null +++ b/docs/performance.md @@ -0,0 +1,46 @@ +# Performance + +## Envoy + +Here are some tips to improve the performance of Service Mesh with Envoy Control. +In the future, [Incremental xDS](https://www.envoyproxy.io/docs/envoy/latest/api-docs/xds_protocol#incremental-xds) +should solve most of the performance issues. + +### Use permissions + +It is recommended to only send to Envoy the data that will be used. +If service A communicates only with B and C, the A's Envoy should only follow B and C clusters. + +This approach solves most of the performance problems. + +During our tests - Envoy that follows 1,000 clusters will use almost 200 MB of RAM in comparison to about 10 MB when +following only a few services. + +Additionally, the network usage is significantly higher. With 1,000 clusters, we've seen snapshot size go up to 300 KB. +Assuming that a new snapshot is generated every second. 1,000 Envoys with 1,000 clusters can generate a load of +300 MB/s. When following only a few services, the snapshot is about 5 KB and it's sent much less frequently. + +### Use ADS + +With xDS, Envoy set up a gRPC stream to Envoy Control per cluster. Let's say there are 1,000 Envoys and 1,000 clusters. +Envoy Control will have to handle 1,000,000 open gRPC streams. This puts pressure on memory, which converts to more +frequent GC runs and higher CPU usage. + +With ADS, each Envoy sets up a single gRPC stream for all clusters. With 1,000 Envoys, there are 1,000 streams which +reduces memory usage dramatically. + +### Sampling + +Envoy Control by default follows changes from the discovery service, batches them and sends to Envoys at most once every second. +This can be set to a longer time which will decrease the workload of Envoy Control at the cost of higher latency of +changes in Envoy. When setting it to a longer time, consider using Outlier Detection - this can passively eliminate +old instances. + +### Use G1 GC + +In Java 9 and onwards, G1 GC is the default Garbage Collection algorithm. When using Java 8, consider switching from +Concurrent Mark and Sweep GC to G1 GC. + +### DOS prevention + +We are currently working on a mechanism that would [allow rate limiting to Envoy Control](https://github.com/envoyproxy/java-control-plane/pull/102). diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 000000000..08cb35791 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,47 @@ +# Quickstart + +## Requirements +* Java 8+ +* Docker & Docker Compose + +## Setting up an environment + +### Dependencies +At this moment, Envoy Control requires Consul to run. +In the future, there will be support for other discovery service systems. +Additionally, to test the system it's convenient to have Envoy that connects to +Envoy Control and a service registered in Consul that will be propagated to Envoy. + +You can run all dependencies with docker-compose +``` +git clone //todo repo url +cd tools +docker-compose up +``` + +To check the environment, go to Consul UI: [http://localhost:18500](http://localhost:18500) and see whether there is +a registered _http-echo_ service. + +Additionally, you can check Envoy Admin at [http://localhost:9999](http://localhost:9999) + +_http-echo_ service is not available outside of docker's network. + +Envoy listener is available on [http://localhost:31000](http://localhost:31000) but the _http-echo_ service location +is not yet propagated to Envoy. + +### Run Envoy Control +Run Envoy Control `./gradlew run` in a cloned catalog. + +## Test the system +After a while, Envoy Control should read the state of Consul and propagate it to Envoy. + +You can check it by sending curl request to _http-echo_ through a proxy. +The request will be sent to Envoy which will be redirected to the _http-echo_ service. +``` +curl -x localhost:31000 http://http-echo/test -v +``` + +Instead of using the proxy feature you can also send a request to Envoy with a Host header. +``` +curl -H "Host: http-echo" http://localhost:31000/status/info +``` \ No newline at end of file diff --git a/envoy-control-runner/build.gradle b/envoy-control-runner/build.gradle new file mode 100644 index 000000000..496c4d5af --- /dev/null +++ b/envoy-control-runner/build.gradle @@ -0,0 +1,36 @@ +plugins { + id 'org.jetbrains.kotlin.jvm' + id 'application' +} + +mainClassName = 'pl.allegro.tech.servicemesh.envoycontrol.EnvoyControl' + +repositories { + mavenCentral() +} + +dependencies { + compile project(':source-consul') + + implementation group: 'org.springframework.boot', name: 'spring-boot-starter', version: versions.spring_boot + compile group: 'org.springframework.boot', name: 'spring-boot-starter-web', version: versions.spring_boot + compile group: 'org.springframework.boot', name: 'spring-boot-starter-actuator', version: versions.spring_boot + compile group: 'io.micrometer', name: 'micrometer-registry-prometheus', version: versions.micrometer + + compile group: 'org.jetbrains.kotlin', name: 'kotlin-stdlib-jdk8', version: versions.kotlin + compile group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: versions.jackson +} + +publishing { + publications { + mavenJava(MavenPublication) { + artifact distZip, { classifier = "deploy" } + } + } +} + +test { + maxParallelForks = 1 + useJUnitPlatform() +} + diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControl.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControl.kt new file mode 100644 index 000000000..56a4c2a1c --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControl.kt @@ -0,0 +1,22 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.springframework.boot.CommandLineRunner +import org.springframework.boot.SpringApplication +import org.springframework.boot.autoconfigure.SpringBootApplication + +@SpringBootApplication +class EnvoyControl( + val controlPlane: ControlPlane +) : CommandLineRunner { + + override fun run(vararg args: String?) { + controlPlane.start() + } + + companion object { + @JvmStatic + fun main(args: Array) { + SpringApplication.run(EnvoyControl::class.java, *args) + } + } +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/ControlPlaneConfig.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/ControlPlaneConfig.kt new file mode 100644 index 000000000..e5b5394f5 --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/ControlPlaneConfig.kt @@ -0,0 +1,122 @@ +package pl.allegro.tech.servicemesh.envoycontrol.infrastructure + +import com.ecwid.consul.v1.ConsulClient +import com.fasterxml.jackson.databind.ObjectMapper +import io.micrometer.core.instrument.MeterRegistry +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.boot.context.properties.ConfigurationProperties +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import pl.allegro.tech.discovery.consul.recipes.ConsulRecipes +import pl.allegro.tech.discovery.consul.recipes.datacenter.ConsulDatacenterReader +import pl.allegro.tech.discovery.consul.recipes.json.JacksonJsonDeserializer +import pl.allegro.tech.discovery.consul.recipes.json.JacksonJsonSerializer +import pl.allegro.tech.discovery.consul.recipes.watch.ConsulWatcher +import pl.allegro.tech.servicemesh.envoycontrol.ControlPlane +import pl.allegro.tech.servicemesh.envoycontrol.DefaultEnvoyControlMetrics +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlMetrics +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlProperties +import pl.allegro.tech.servicemesh.envoycontrol.consul.ConsulProperties +import pl.allegro.tech.servicemesh.envoycontrol.consul.services.ConsulLocalServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.consul.services.ConsulServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.EmptyAddressFilter +import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.IpAddressFilter +import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.RegexServiceInstancesFilter +import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.ServiceInstancesTransformer +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.GlobalServiceChanges +import reactor.core.scheduler.Schedulers +import java.net.URI + +@Configuration +class ControlPlaneConfig { + init { + Schedulers.enableMetrics() + } + + @Bean + @ConfigurationProperties("envoy-control") + fun envoyControlProperties() = EnvoyControlProperties() + + @Bean + @ConfigurationProperties("envoy-control.source.consul") + fun consulProperties() = ConsulProperties() + + @Bean + @ConditionalOnMissingBean(ControlPlane::class) + fun controlPlane( + properties: EnvoyControlProperties, + meterRegistry: MeterRegistry, + globalServiceChanges: GlobalServiceChanges, + metrics: EnvoyControlMetrics + ): ControlPlane = + ControlPlane.builder(properties, meterRegistry) + .withMetrics(metrics) + .build(globalServiceChanges.combined()) + + @Bean + fun consulServiceChanges( + watcher: ConsulWatcher, + metrics: EnvoyControlMetrics, + objectMapper: ObjectMapper, + consulProperties: ConsulProperties + ) = ConsulServiceChanges(watcher, metrics, objectMapper, consulProperties.subscriptionDelay) + + @Bean + fun localServiceChanges( + consulServiceChanges: ConsulServiceChanges, + consulProperties: ConsulProperties, + transformers: List + ): LocalServiceChanges = ConsulLocalServiceChanges( + consulServiceChanges, + Locality.LOCAL, + localDatacenter(consulProperties), + transformers + ) + + @Bean + fun consulDatacenterReader(consulProperties: ConsulProperties, objectMapper: ObjectMapper): ConsulDatacenterReader = + ConsulRecipes.consulRecipes() + .withJsonDeserializer(JacksonJsonDeserializer(objectMapper)) + .withJsonSerializer(JacksonJsonSerializer(objectMapper)) + .build() + .consulDatacenterReader() + .withAgentUri(URI("http://${consulProperties.host}:${consulProperties.port}")) + .build() + + @Bean + fun envoyControlMetrics(meterRegistry: MeterRegistry): EnvoyControlMetrics = controlPlaneMetrics(meterRegistry) + + @Bean + fun emptyAddressFilter() = EmptyAddressFilter() + + @Bean + fun ipAddressFilter() = IpAddressFilter() + + @Bean + @ConditionalOnProperty("envoy-control.service-filters.excluded-names-patterns") + fun excludeServicesFilter(properties: EnvoyControlProperties) = + RegexServiceInstancesFilter(properties.serviceFilters.excludedNamesPatterns) + + @Bean + fun globalServiceChanges( + serviceChanges: Array + ): GlobalServiceChanges = + GlobalServiceChanges(serviceChanges) + + fun localDatacenter(properties: ConsulProperties) = + ConsulClient(properties.host, properties.port).agentSelf.value?.config?.datacenter ?: "local" + + fun controlPlaneMetrics(meterRegistry: MeterRegistry) = + DefaultEnvoyControlMetrics().also { + meterRegistry.gauge("services.added", it.servicesAdded) + meterRegistry.gauge("services.removed", it.servicesRemoved) + meterRegistry.gauge("services.instanceChanged", it.instanceChanges) + meterRegistry.gauge("services.snapshotChanged", it.snapshotChanges) + meterRegistry.gauge("cache.groupsCount", it.cacheGroupsCount) + meterRegistry.more().counter("services.watch.errors", listOf(), it.errorWatchingServices) + } +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/SynchronizationConfig.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/SynchronizationConfig.kt new file mode 100644 index 000000000..00b88802c --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/SynchronizationConfig.kt @@ -0,0 +1,62 @@ +package pl.allegro.tech.servicemesh.envoycontrol.infrastructure + +import com.ecwid.consul.v1.ConsulClient +import io.micrometer.core.instrument.MeterRegistry +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import org.springframework.core.task.SimpleAsyncTaskExecutor +import org.springframework.http.client.SimpleClientHttpRequestFactory +import org.springframework.web.client.AsyncRestTemplate +import pl.allegro.tech.discovery.consul.recipes.datacenter.ConsulDatacenterReader +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlProperties +import pl.allegro.tech.servicemesh.envoycontrol.consul.ConsulProperties +import pl.allegro.tech.servicemesh.envoycontrol.consul.synchronization.SimpleConsulInstanceFetcher +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.AsyncControlPlaneClient +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.AsyncRestTemplateControlPlaneClient +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.ControlPlaneInstanceFetcher +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.CrossDcServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.CrossDcServices + +@Configuration +@ConditionalOnProperty(name = ["envoy-control.sync.enabled"], havingValue = "true", matchIfMissing = false) +class SynchronizationConfig { + + @Bean + fun asyncRestTemplate(envoyControlProperties: EnvoyControlProperties): AsyncRestTemplate { + val requestFactory = SimpleClientHttpRequestFactory() + requestFactory.setTaskExecutor(SimpleAsyncTaskExecutor()) + requestFactory.setConnectTimeout(envoyControlProperties.sync.connectionTimeout.toMillis().toInt()) + requestFactory.setReadTimeout(envoyControlProperties.sync.readTimeout.toMillis().toInt()) + + return AsyncRestTemplate(requestFactory) + } + + @Bean + fun controlPlaneClient(asyncRestTemplate: AsyncRestTemplate) = + AsyncRestTemplateControlPlaneClient(asyncRestTemplate) + + @Bean + fun crossDcServices( + controlPlaneClient: AsyncControlPlaneClient, + meterRegistry: MeterRegistry, + controlPlaneInstanceFetcher: ControlPlaneInstanceFetcher, + consulDatacenterReader: ConsulDatacenterReader, + properties: EnvoyControlProperties + ): CrossDcServiceChanges { + + val remoteDcs = consulDatacenterReader.knownDatacenters() - consulDatacenterReader.localDatacenter() + val service = CrossDcServices(controlPlaneClient, meterRegistry, controlPlaneInstanceFetcher, remoteDcs) + + return CrossDcServiceChanges(properties, service) + } + + @Bean + fun instanceFetcher( + consulProperties: ConsulProperties, + envoyControlProperties: EnvoyControlProperties + ) = SimpleConsulInstanceFetcher( + ConsulClient(consulProperties.host, consulProperties.port), + envoyControlProperties.sync.envoyControlAppName + ) +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/consul/ConsulWatcherConfig.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/consul/ConsulWatcherConfig.kt new file mode 100644 index 000000000..db330a97e --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/consul/ConsulWatcherConfig.kt @@ -0,0 +1,112 @@ +package pl.allegro.tech.servicemesh.envoycontrol.infrastructure.consul + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.KotlinModule +import okhttp3.Dispatcher +import okhttp3.Interceptor +import okhttp3.OkHttpClient +import okhttp3.Response +import okhttp3.internal.Util +import org.springframework.context.annotation.Bean +import org.springframework.context.annotation.Configuration +import pl.allegro.tech.discovery.consul.recipes.ConsulRecipes +import pl.allegro.tech.discovery.consul.recipes.json.JacksonJsonDeserializer +import pl.allegro.tech.discovery.consul.recipes.json.JacksonJsonSerializer +import pl.allegro.tech.discovery.consul.recipes.watch.ConsulWatcher +import pl.allegro.tech.servicemesh.envoycontrol.consul.ConsulProperties +import pl.allegro.tech.servicemesh.envoycontrol.consul.ConsulWatcherOkHttpProperties +import java.net.URI +import java.util.concurrent.ExecutorService +import java.util.concurrent.Executors +import java.util.concurrent.SynchronousQueue +import java.util.concurrent.ThreadFactory +import java.util.concurrent.ThreadPoolExecutor +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicInteger + +@Configuration +open class ConsulWatcherConfig { + + @Bean + fun consulWatcher( + consulProperties: ConsulProperties, + objectMapper: ObjectMapper + ): ConsulWatcher { + val watcherPool = watcherPool() + + val client = okHttpClient(consulProperties.watcher) + + return createConsulWatcher(consulProperties, objectMapper, client, watcherPool) + } + + protected fun watcherPool(): ExecutorService { + return Executors.newFixedThreadPool(1, RecipesThreadFactory()) + } + + protected fun createConsulWatcher( + properties: ConsulProperties, + objectMapper: ObjectMapper, + client: OkHttpClient, + watcherPool: ExecutorService + ): ConsulWatcher { + return ConsulRecipes.consulRecipes() + .withAgentUri(URI("http://${properties.host}:${properties.port}")) + .withJsonDeserializer(JacksonJsonDeserializer(objectMapper)) + .withJsonSerializer(JacksonJsonSerializer(objectMapper)) + .withWatchesHttpClient(client) + .build() + .consulWatcher(watcherPool) + .requireDefaultConsistency() + .build() + } + + protected fun okHttpClient(watcherConfig: ConsulWatcherOkHttpProperties): OkHttpClient { + return customizeClient(OkHttpClient.Builder(), watcherConfig) + .readTimeout(watcherConfig.readTimeout.toMillis(), TimeUnit.MILLISECONDS) + .connectTimeout(watcherConfig.connectTimeout.toMillis(), TimeUnit.MILLISECONDS) + .build() + } + + protected fun customizeClient( + builder: OkHttpClient.Builder, + watcherConfig: ConsulWatcherOkHttpProperties + ): OkHttpClient.Builder { + val dispatcher = Dispatcher(createDispatcherPool(watcherConfig)) + dispatcher.maxRequests = watcherConfig.maxRequests + dispatcher.maxRequestsPerHost = watcherConfig.maxRequests + + return builder.addInterceptor(NoGzipIntercetor()) + .dispatcher(dispatcher) + } + + protected fun createDispatcherPool(watcherConfig: ConsulWatcherOkHttpProperties): ExecutorService { + return ThreadPoolExecutor( + 0, + watcherConfig.dispatcherMaxPoolSize, + watcherConfig.dispatcherPoolKeepAliveTime.toMillis(), + TimeUnit.MILLISECONDS, + SynchronousQueue(), + Util.threadFactory("consul-okhttp-dispatcher", false) + ) + } + + protected class NoGzipIntercetor : Interceptor { + override fun intercept(chain: Interceptor.Chain): Response { + return chain.proceed( + chain + .request() + .newBuilder() + .addHeader("Accept-Encoding", "identity") + .build() + ) + } + } + + private class RecipesThreadFactory : ThreadFactory { + private val counter = AtomicInteger() + override fun newThread(r: Runnable) = Thread(r, "consul-watcher-worker-${counter.getAndIncrement()}") + } + + @Bean + fun kotlinModule() = KotlinModule() +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/health/EnvoyControlHealthIndicator.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/health/EnvoyControlHealthIndicator.kt new file mode 100644 index 000000000..7c7e929e3 --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/infrastructure/health/EnvoyControlHealthIndicator.kt @@ -0,0 +1,17 @@ +package pl.allegro.tech.servicemesh.envoycontrol.infrastructure.health + +import org.springframework.boot.actuate.health.AbstractHealthIndicator +import org.springframework.boot.actuate.health.Health +import org.springframework.stereotype.Component +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalServiceChanges + +@Component +class EnvoyControlHealthIndicator(private val localServiceChanges: LocalServiceChanges) : AbstractHealthIndicator() { + override fun doHealthCheck(builder: Health.Builder?) { + if (localServiceChanges.isServiceStateLoaded()) { + builder!!.up() + } else { + builder!!.down() + } + } +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncRestTemplateControlPlaneClient.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncRestTemplateControlPlaneClient.kt new file mode 100644 index 000000000..7a14fd294 --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncRestTemplateControlPlaneClient.kt @@ -0,0 +1,14 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import org.springframework.web.client.AsyncRestTemplate +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import reactor.core.publisher.Mono +import java.net.URI + +class AsyncRestTemplateControlPlaneClient(val asyncRestTemplate: AsyncRestTemplate) : AsyncControlPlaneClient { + override fun getState(uri: URI): Mono = + asyncRestTemplate.getForEntity("$uri/state", ServicesState::class.java) + .completable() + .thenApply { it.body } + .let { Mono.fromCompletionStage(it) } +} diff --git a/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/StateController.kt b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/StateController.kt new file mode 100644 index 000000000..fcb689e6a --- /dev/null +++ b/envoy-control-runner/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/StateController.kt @@ -0,0 +1,19 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import org.springframework.web.bind.annotation.GetMapping +import org.springframework.web.bind.annotation.PathVariable +import org.springframework.web.bind.annotation.RestController +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState + +@RestController +class StateController(val localServiceChanges: LocalServiceChanges) { + + @GetMapping("/state") + fun getState(): ServicesState = localServiceChanges.latestServiceState.get() + + @GetMapping("/state/{serviceName}") + fun getStateByServiceName(@PathVariable("serviceName") serviceName: String): ServiceInstances? = + localServiceChanges.latestServiceState.get()[serviceName] +} diff --git a/envoy-control-runner/src/main/resources/application-docker.yaml b/envoy-control-runner/src/main/resources/application-docker.yaml new file mode 100644 index 000000000..0077f8ea1 --- /dev/null +++ b/envoy-control-runner/src/main/resources/application-docker.yaml @@ -0,0 +1,5 @@ +envoy-control: + source: + consul: + host: consul + port: 8500 \ No newline at end of file diff --git a/envoy-control-runner/src/main/resources/application-local.yaml b/envoy-control-runner/src/main/resources/application-local.yaml new file mode 100644 index 000000000..278f86dfd --- /dev/null +++ b/envoy-control-runner/src/main/resources/application-local.yaml @@ -0,0 +1,5 @@ +envoy-control: + source: + consul: + host: localhost + port: 18500 \ No newline at end of file diff --git a/envoy-control-runner/src/main/resources/application.yaml b/envoy-control-runner/src/main/resources/application.yaml new file mode 100644 index 000000000..699fe212a --- /dev/null +++ b/envoy-control-runner/src/main/resources/application.yaml @@ -0,0 +1,16 @@ +server.port: 8080 + +application: + name: envoy-control + +envoy-control: + source: + consul: + host: localhost + +management: + endpoint: + metrics.enabled: true + prometheus.enabled: true + endpoints.web.exposure.include: "*" + metrics.export.prometheus.enabled: true \ No newline at end of file diff --git a/envoy-control-runner/src/main/resources/logback.xml b/envoy-control-runner/src/main/resources/logback.xml new file mode 100644 index 000000000..022806f0e --- /dev/null +++ b/envoy-control-runner/src/main/resources/logback.xml @@ -0,0 +1,13 @@ + + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + diff --git a/envoy-control-tests/build.gradle b/envoy-control-tests/build.gradle new file mode 100644 index 000000000..d54b8e55f --- /dev/null +++ b/envoy-control-tests/build.gradle @@ -0,0 +1,46 @@ +plugins { + id 'org.jetbrains.kotlin.jvm' +} + +repositories { + mavenCentral() +} + +dependencies { + compile project(':envoy-control-runner') + + compile group: 'org.assertj', name: 'assertj-core', version: versions.assertj + compile group: 'org.junit.jupiter', name: 'junit-jupiter-api', version: versions.junit + compile group: 'org.awaitility', name: 'awaitility', version: versions.awaitility + compile(group: 'com.pszymczyk.consul', name: 'embedded-consul', version: versions.embedded_consul) { + exclude group: 'org.apache.httpcomponents', module: 'httpclient' + } + compile group: 'eu.rekawek.toxiproxy', name: 'toxiproxy-java', version: versions.toxiproxy + runtimeOnly group: 'org.junit.jupiter', name: 'junit-jupiter-engine', version: versions.junit + implementation group: 'org.testcontainers', name: 'junit-jupiter', version: versions.testcontainers + implementation group: 'org.testcontainers', name: 'testcontainers', version: versions.testcontainers +} + +publishing { + publications { + mavenJava(MavenPublication) { + from components.java + } + } +} + +test { + useJUnitPlatform { + excludeTags 'reliability' + } + maxParallelForks = 1 + testClassesDirs = project.sourceSets.main.output.classesDirs +} + +task reliabilityTest(type: Test) { + systemProperty 'RELIABILITY_FAILURE_DURATION_SECONDS', System.getProperty('RELIABILITY_FAILURE_DURATION_SECONDS', '300') + useJUnitPlatform { + includeTags 'reliability' + } + testClassesDirs = project.sourceSets.main.output.classesDirs +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ConnectionsMeterTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ConnectionsMeterTest.kt new file mode 100644 index 000000000..9c95cd8ee --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ConnectionsMeterTest.kt @@ -0,0 +1,86 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import io.micrometer.core.instrument.MeterRegistry +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.Ads +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.Xds +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.ADS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.CDS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.EDS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.LDS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.RDS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.SDS +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks.MetricsStreamType.UNKNOWN + +internal class XdsConnectionsMeterTest : EnvoyControlTestConfiguration() { + companion object { + + @JvmStatic + @BeforeAll + fun nonAdsSetup() { + setup(envoyConfig = Xds) + } + } + + @Test + fun `should meter the gRPC connections`() { + // given + val meterRegistry: MeterRegistry = bean() + registerService(name = "echo") + + untilAsserted { + // expect + mapOf( + CDS to 1, + EDS to 2, // separate streams for consul and echo + LDS to 0, + RDS to 2, // default_routes + SDS to 0, + ADS to 0, + UNKNOWN to 0 + ).forEach { (type, value) -> + val metric = "grpc.connections.${type.name.toLowerCase()}" + assertThat(meterRegistry.find(metric).gauge()).isNotNull + assertThat(meterRegistry.get(metric).gauge().value().toInt()).isEqualTo(value) + } + } + } +} + +internal class AdsConnectionsMeterTest : EnvoyControlTestConfiguration() { + companion object { + + @JvmStatic + @BeforeAll + fun nonAdsSetup() { + setup(envoyConfig = Ads) + } + } + + @Test + fun `should meter the gRPC connections`() { + // given + val meterRegistry: MeterRegistry = bean() + registerService(name = "echo") + + untilAsserted { + // expect + mapOf( + CDS to 0, + EDS to 0, + LDS to 0, + RDS to 0, + SDS to 0, + ADS to 1, // all info is exchanged on one stream + UNKNOWN to 0 + ).forEach { (type, value) -> + val metric = "grpc.connections.${type.name.toLowerCase()}" + assertThat(meterRegistry.find(metric).gauge()).isNotNull + assertThat(meterRegistry.get(metric).gauge().value().toInt()).isEqualTo(value) + } + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlSynchronizationTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlSynchronizationTest.kt new file mode 100644 index 000000000..660bde64e --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlSynchronizationTest.kt @@ -0,0 +1,134 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import com.codahale.metrics.Timer +import org.assertj.core.api.Assertions.assertThat +import org.awaitility.Awaitility.await +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.echo.EchoContainer +import java.time.Duration +import java.util.UUID +import java.util.concurrent.TimeUnit + +internal class EnvoyControlSynchronizationRunnerTest : EnvoyControlSynchronizationTest() { + override val pollingInterval: Duration = Companion.pollingInterval + override val stateSampleDuration: Duration = Companion.stateSampleDuration + + companion object { + val pollingInterval = Duration.ofSeconds(1) + val stateSampleDuration = Duration.ofSeconds(1) + + @BeforeAll + @JvmStatic + fun setupTest() { + val properties = mapOf( + "envoy-control.envoy.snapshot.stateSampleDuration" to stateSampleDuration, + "envoy-control.sync.enabled" to true, + "envoy-control.sync.polling-interval" to pollingInterval.seconds + ) + setup( + envoyControls = 2, + appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) } + ) + } + } +} + +abstract class EnvoyControlSynchronizationTest : EnvoyControlTestConfiguration() { + + abstract val pollingInterval: Duration + abstract val stateSampleDuration: Duration + + private val logger by logger() + + @Test + fun `should prefer services from local dc and fallback to remote dc when needed`() { + // given: local and remote instances + registerServiceInRemoteDc("echo", echoContainer2) + val localId = registerServiceInLocalDc("echo") + + // then: local called + waitUntilEchoCalledThroughEnvoyResponds(echoContainer) + + // when: no local instances + deregisterService(localId) + + // then: remote called + waitUntilEchoCalledThroughEnvoyResponds(echoContainer2) + + // when: local instances again + registerServiceInLocalDc("echo") + + // then: local called + waitUntilEchoCalledThroughEnvoyResponds(echoContainer) + } + + @Test + fun `latency between service registration in remote dc and being able to access it via envoy should be similar to envoy-control polling interval`() { + // when + val latency = measureRegistrationToAccessLatency { name, target -> + registerServiceInRemoteDc(name, target) + } + + // then + logger.info("remote dc latency: $latency") + + val tolerance = Duration.ofMillis(400) + stateSampleDuration + val expectedMax = (pollingInterval + tolerance).toMillis() + assertThat(latency.max()).isLessThanOrEqualTo(expectedMax) + } + + @Test + fun `latency between service registration in local dc and being able to access it via envoy should be less than 0,5s + stateSampleDuration`() { + // when + val latency = measureRegistrationToAccessLatency { name, target -> + registerServiceInLocalDc(name, target) + } + + // then + logger.info("local dc latency: $latency") + + assertThat(latency.max()).isLessThanOrEqualTo(500 + stateSampleDuration.toMillis()) + } + + private fun measureRegistrationToAccessLatency(registerService: (String, EchoContainer) -> Unit): LatencySummary { + val timer = Timer() + + // when + for (i in 1..5) { + val serviceName = "service-$i" + registerService.invoke(serviceName, echoContainer) + + timer.time { + await() + .pollDelay(50, TimeUnit.MILLISECONDS) + .atMost(defaultDuration) + .untilAsserted { + // when + val response = callService(serviceName) + + // then + assertThat(response).isOk().isFrom(echoContainer) + } + } + } + return LatencySummary(timer) + } + + private fun registerServiceInLocalDc(name: String, target: EchoContainer = echoContainer): String = + registerService(UUID.randomUUID().toString(), name, target) + + private class LatencySummary(private val timer: Timer) { + + private fun nanosToMillis(nanos: Long) = Duration.ofNanos(nanos).toMillis() + + fun max(): Long = nanosToMillis(timer.snapshot.max) + + override fun toString() = "LatencySummary(" + + "max = ${nanosToMillis(timer.snapshot.max)} ms, " + + "median = ${nanosToMillis(timer.snapshot.median.toLong())} ms" + + ")" + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlTest.kt new file mode 100644 index 000000000..b9b31fedc --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlTest.kt @@ -0,0 +1,98 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.Ads +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.Xds + +internal class AdsEnvoyControlTest : EnvoyControlTest() { + companion object { + + @JvmStatic + @BeforeAll + fun adsSetup() { + setup(envoyConfig = Ads) + } + } +} + +internal class XdsEnvoyControlTest : EnvoyControlTest() { + companion object { + + @JvmStatic + @BeforeAll + fun nonAdsSetup() { + setup(envoyConfig = Xds) + } + } +} + +internal abstract class EnvoyControlTest : EnvoyControlTestConfiguration() { + + @Test + fun `should allow proxy-ing request using envoy`() { + // given + registerService(name = "echo") + + untilAsserted { + // when + val response = callEcho() + + // then + assertThat(response).isOk().isFrom(echoContainer) + } + } + + @Test + fun `should route traffic to the second instance when first is deregistered`() { + // given + val id = registerService(name = "echo") + + untilAsserted { + // when + val response = callEcho() + + // then + assertThat(response).isOk().isFrom(echoContainer) + } + + // given + // we first register a new instance and then remove other to maintain cluster presence in Envoy + registerService(name = "echo", container = echoContainer2) + waitForEchoServices(instances = 2) + + deregisterService(id) + waitForEchoServices(instances = 1) + + untilAsserted { + // when + val response = callEcho() + + // then + assertThat(response).isOk().isFrom(echoContainer2) + } + } + + private fun waitForEchoServices(instances: Int) { + untilAsserted { + assertThat(envoyContainer.admin().numOfEndpoints(clusterName = "echo")).isEqualTo(instances) + } + } + + @Test + fun `should assign endpoints to correct zones`() { + // given + registerService(name = "echo") + + untilAsserted { + // when + val adminInstance = envoyContainer.admin().zone(cluster = "echo", ip = echoContainer.ipAddress()) + + // then + assertThat(adminInstance).isNotNull + assertThat(adminInstance!!.zone).isEqualTo("dc1") + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/HealthIndicatorTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/HealthIndicatorTest.kt new file mode 100644 index 000000000..57dbc387a --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/HealthIndicatorTest.kt @@ -0,0 +1,36 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration + +internal class HealthIndicatorTest : EnvoyControlTestConfiguration() { + + companion object { + + private val properties = mapOf( + "management.endpoint.health.show-details" to "ALWAYS" + ) + + @JvmStatic + @BeforeAll + fun setupTest() { + setup(appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) }) + } + } + + @Test + fun `should application state be healthy after state of applications is loaded from consul`() { + untilAsserted { + // when + val responseState = envoyControl1.getState() + val healthStatus = envoyControl1.getHealthStatus() + + // then + assertThat(healthStatus).isStatusHealthy().hasEnvoyControlCheckPassed() + assertThat(responseState).hasServiceStateChanged() + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/IncomingPermissionsTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/IncomingPermissionsTest.kt new file mode 100644 index 000000000..92c0c8f85 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/IncomingPermissionsTest.kt @@ -0,0 +1,48 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration + +internal class IncomingPermissionsTest : EnvoyControlTestConfiguration() { + + companion object { + + private val properties = mapOf( + "envoy-control.envoy.snapshot.incoming-permissions.enabled" to true, + "envoy-control.envoy.snapshot.incoming-permissions.endpoint-unavailable-status-code" to 403 + ) + + @JvmStatic + @BeforeAll + fun setupTest() { + setup(appFactoryForEc1 = { consulPort -> + EnvoyControlRunnerTestApp(properties = properties, consulPort = consulPort) + }) + } + } + + @Test + fun `should allow access to endpoint by authorized client`() { + untilAsserted { + // when + val response = callLocalService(endpoint = "/endpoint", clientServiceName = "authorizedClient") + + // then + assertThat(response).isOk().isFrom(localServiceContainer) + } + } + + @Test + fun `should deny access to endpoint by unauthorized client`() { + untilAsserted { + // when + val response = callLocalService(endpoint = "/endpoint", clientServiceName = "unauthorizedClient") + + // then + assertThat(response).isForbidden() + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OriginalDestinationTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OriginalDestinationTest.kt new file mode 100644 index 000000000..4ec5eb662 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OriginalDestinationTest.kt @@ -0,0 +1,32 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration + +internal class OriginalDestinationTest : EnvoyControlTestConfiguration() { + + companion object { + + @JvmStatic + @BeforeAll + fun setupTest() { + setup() + } + } + + @Test + fun `should send direct request when host envoy-original-destination and header x-envoy-original-dst-host with IP provided`() { + untilAsserted { + // when + val response = callServiceWithOriginalDst( + echoContainer.address(), + envoyContainer.egressListenerUrl() + ) + + // then + assertThat(response).isOk().isFrom(echoContainer) + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutgoingPermissionsTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutgoingPermissionsTest.kt new file mode 100644 index 000000000..e5a427db8 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutgoingPermissionsTest.kt @@ -0,0 +1,67 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.Ads +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.Xds + +class AdsOutgoingPermissionsTest : OutgoingPermissionsTest() { + companion object { + + private val properties = mapOf("envoy-control.envoy.snapshot.outgoing-permissions.enabled" to true) + + @JvmStatic + @BeforeAll + fun setupTest() { + setup( + envoyConfig = Ads, + appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) } + ) + } + } +} + +class XdsOutgoingPermissionsTest : OutgoingPermissionsTest() { + companion object { + + private val properties = mapOf("envoy-control.envoy.snapshot.outgoing-permissions.enabled" to true) + + @JvmStatic + @BeforeAll + fun setupTest() { + setup( + envoyConfig = Xds, + appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) } + ) + } + } +} + +abstract class OutgoingPermissionsTest : EnvoyControlTestConfiguration() { + + @Test + fun `should only allow access to resources from node_metadata_dependencies`() { + // given + registerService(name = "not-accessible", container = echoContainer) + registerService(name = "echo") + + untilAsserted { + // when + val unreachableResponse = EnvoyControlTestConfiguration.callService(service = "not-accessible") + val unregisteredResponse = EnvoyControlTestConfiguration.callService(service = "unregistered") + val reachableResponse = EnvoyControlTestConfiguration.callEcho() + val reachableDomainResponse = EnvoyControlTestConfiguration.callDomain("www.example.com") + val unreachableDomainResponse = EnvoyControlTestConfiguration.callDomain("www.another-example.com") + + // then + assertThat(unreachableResponse).isUnreachable().hasLocationHeaderFrom("not-accessible") + assertThat(unregisteredResponse).isUnreachable().hasLocationHeaderFrom("unregistered") + assertThat(reachableResponse).isOk().isFrom(echoContainer) + assertThat(reachableDomainResponse).isOk() + assertThat(unreachableDomainResponse).isUnreachable() + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutlierDetectionTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutlierDetectionTest.kt new file mode 100644 index 000000000..d539425d0 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/OutlierDetectionTest.kt @@ -0,0 +1,66 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.envoy.EnvoyAdmin + +internal class OutlierDetectionTest : EnvoyControlTestConfiguration() { + companion object { + + private val properties = mapOf( + "envoy-control.envoy.snapshot.cluster-outlier-detection.enabled" to true + ) + + @JvmStatic + @BeforeAll + fun setupOutlierDetectionTest() { + setup(appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) }) + } + } + + @AfterEach + fun after() { + cleanupTest() + if (!echoContainer2.isRunning) { + echoContainer2.start() + } + } + + @Test + fun `should not send requests to instance when outlier check failed`() { + // given + val unhealthyIp = echoContainer2.ipAddress() + registerService(name = "echo") + registerService(name = "echo", container = echoContainer2) + echoContainer2.stop() + + untilAsserted { + // when + callEcho() + + // then + assertThat(hasOutlierCheckFailed(cluster = "echo", unhealthyIp = unhealthyIp)).isTrue() + } + + // when + repeat(times = 10) { + // when + val response = callEcho() + + // then + assertThat(response).isOk().isFrom(echoContainer) + } + } + + private fun hasOutlierCheckFailed(cluster: String, unhealthyIp: String): Boolean { + return EnvoyAdmin(address = envoyContainer.adminUrl()) + .hostStatus(cluster, unhealthyIp) + ?.healthStatus + ?.failedOutlierCheck + ?: false + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RegexServicesFilterTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RegexServicesFilterTest.kt new file mode 100644 index 000000000..d64683c90 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RegexServicesFilterTest.kt @@ -0,0 +1,43 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration + +class RegexServicesFilterTest : EnvoyControlTestConfiguration() { + + companion object { + + private val properties = mapOf( + "envoy-control.service-filters.excluded-names-patterns" to ".*-[1-2]$".toRegex() + ) + + @JvmStatic + @BeforeAll + fun setupTest() { + setup(appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) }) + } + } + + @Test + fun `should not reach service whose name ends with number from 1 to 4`() { + // given + registerService(name = "service-1", container = echoContainer) + registerService(name = "service-2", container = echoContainer) + registerService(name = "service-3", container = echoContainer) + + untilAsserted { + // when + val response1 = callService("service-1") + val response2 = callService("service-2") + val response3 = callService("service-3") + + // then + assertThat(response1).isUnreachable() + assertThat(response2).isUnreachable() + assertThat(response3).isOk().isFrom(echoContainer) + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RetryPolicyTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RetryPolicyTest.kt new file mode 100644 index 000000000..424b8a0e3 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/RetryPolicyTest.kt @@ -0,0 +1,46 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration + +internal class RetryPolicyTest : EnvoyControlTestConfiguration() { + companion object { + + private val properties = mapOf( + "envoy-control.envoy.snapshot.local-service.retry-policy.per-http-method.GET.enabled" to true, + "envoy-control.envoy.snapshot.local-service.retry-policy.per-http-method.GET.retry-on" to listOf("connect-failure", "reset"), + "envoy-control.envoy.snapshot.local-service.retry-policy.per-http-method.GET.num-retries" to 3 + ) + + @JvmStatic + @BeforeAll + fun setupRetryPolicyTest() { + setup(appFactoryForEc1 = { consulPort -> EnvoyControlRunnerTestApp(properties, consulPort) }) + } + } + + @Test + fun `should retry request 3 times when application is down`() { + // given + localServiceContainer.stop() + + // when + callLocalService(endpoint = "/endpoint", clientServiceName = "authorizedClient") + + untilAsserted { + // then + assertThat(hasRetriedRequest(numberOfRetries = 3)).isTrue() + } + } + + private fun hasRetriedRequest(numberOfRetries: Long): Boolean { + return envoyContainer.admin() + .statValue("cluster.local_service.upstream_rq_retry") + ?.toLong() + ?.equals(numberOfRetries) + ?: false + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/BaseEnvoyTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/BaseEnvoyTest.kt new file mode 100644 index 000000000..1dea59648 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/BaseEnvoyTest.kt @@ -0,0 +1,152 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config + +import org.awaitility.Duration +import org.testcontainers.containers.GenericContainer +import org.testcontainers.containers.Network +import org.testcontainers.junit.jupiter.Testcontainers +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulClientConfig +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulContainer +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulOperations +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulServerConfig +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulSetup +import pl.allegro.tech.servicemesh.envoycontrol.config.echo.EchoContainer +import java.io.File +import java.lang.Thread.sleep +import java.util.UUID +import java.util.concurrent.TimeUnit + +@Testcontainers +open class BaseEnvoyTest { + companion object { + val defaultDuration = Duration(60, TimeUnit.SECONDS) + val network: Network = Network.newNetwork() + + val echoContainer: EchoContainer = EchoContainer() + val echoContainer2: EchoContainer = EchoContainer() + + val consulMastersInDc1 = listOf( + ConsulSetup(network, ConsulServerConfig(1, "dc1")), + ConsulSetup(network, ConsulServerConfig(2, "dc1")), + ConsulSetup(network, ConsulServerConfig(3, "dc1")) + ) + + val consulMastersInDc2 = listOf( + ConsulSetup(network, ConsulServerConfig(1, "dc2")), + ConsulSetup(network, ConsulServerConfig(2, "dc2")), + ConsulSetup(network, ConsulServerConfig(3, "dc2")) + ) + + var consulAgentInDc1: ConsulSetup + var lowRpcConsulClient: ConsulSetup + + val consulOperationsInFirstDc = consulMastersInDc1[0].consulOperations + val consulOperationsInSecondDc = consulMastersInDc2[0].consulOperations + val consulHttpPort = consulMastersInDc1[0].port + val consul2HttpPort = consulMastersInDc2[0].port + val consul: ConsulContainer = consulMastersInDc1[0].container + + init { + echoContainer.start() + echoContainer2.start() + setupMultiDcConsul() + consulAgentInDc1 = ConsulSetup(network, ConsulClientConfig(1, "dc1", consul.ipAddress())) + consulAgentInDc1.container.start() + lowRpcConsulClient = setupLowRpcConsulClient() + } + + private fun setupLowRpcConsulClient(): ConsulSetup { + val client = ConsulSetup( + network, + ConsulClientConfig( + id = 2, + dc = "dc1", + serverAddress = consul.ipAddress(), + jsonFiles = listOf(File("testcontainers/consul-low-rpc-rate.json")) + ) + ) + client.container.start() + return client + } + + private fun setupMultiDcConsul() { + startConsulCluster(consulMastersInDc1) + startConsulCluster(consulMastersInDc2) + joinClusters(consulMastersInDc1, consulMastersInDc2) + } + + private fun joinClusters(consulsInDc1: List, consulsInDc2: List) { + consulsInDc1.forEach { consul -> + val consulInDc2ContainerNames = consulsInDc2.map { it.container.containerName() }.toTypedArray() + val args = arrayOf("consul", "join", "-wan", *consulInDc2ContainerNames) + consul.container.execInContainer(*args) + } + } + + private fun startConsulCluster(consuls: List) { + consuls.forEach { consul -> + consul.container.start() + } + consuls.forEach { consul -> + val consulContainerNames = consuls.map { it.container.containerName() }.toTypedArray() + val args = arrayOf("consul", "join", *consulContainerNames) + consul.container.execInContainer(*args) + } + } + + fun registerService( + id: String, + name: String, + address: String, + port: Int, + consulOps: ConsulOperations = consulOperationsInFirstDc + ): String = consulOps.registerService( + id = id, + name = name, + address = address, + port = port + ) + + fun registerService( + id: String = UUID.randomUUID().toString(), + name: String, + container: GenericContainer<*> = echoContainer, + port: Int = EchoContainer.PORT, + consulOps: ConsulOperations = consulOperationsInFirstDc, + registerDefaultCheck: Boolean = false + ): String { + val echoContainerIp = + container.getContainerInfo().networkSettings.networks[(network as Network.NetworkImpl).name]!!.ipAddress + return consulOps.registerService( + id = id, + name = name, + address = echoContainerIp, + port = port, + registerDefaultCheck = registerDefaultCheck + ) + } + + fun registerServiceInRemoteDc(name: String, target: EchoContainer = echoContainer): String { + return registerService( + id = UUID.randomUUID().toString(), + name = name, + container = target, + consulOps = consulOperationsInSecondDc + ) + } + + fun deregisterService(id: String, consulOps: ConsulOperations = consulOperationsInFirstDc) { + consulOps.deregisterService(id) + } + + fun deregisterServiceInRemoteDc(id: String) { + consulOperationsInSecondDc.deregisterService(id) + } + + fun deregisterAllServices() { + consulOperationsInFirstDc.deregisterAll() + consulOperationsInSecondDc.deregisterAll() + consulAgentInDc1.consulOperations.deregisterAll() + sleep(1000) // todo remove it? + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestApp.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestApp.kt new file mode 100644 index 000000000..be938cd27 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestApp.kt @@ -0,0 +1,108 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config + +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.KotlinModule +import com.pszymczyk.consul.infrastructure.Ports +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.Response +import org.springframework.boot.actuate.health.Status +import org.springframework.boot.builder.SpringApplicationBuilder +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControl +import pl.allegro.tech.servicemesh.envoycontrol.logger +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import java.time.Duration + +interface EnvoyControlTestApp { + val appPort: Int + val grpcPort: Int + val appName: String + fun run() + fun stop() + fun isHealthy(): Boolean + fun getState(): ServicesState + fun getHealthStatus(): Health + fun bean(clazz: Class): T +} + +class EnvoyControlRunnerTestApp( + val properties: Map = mapOf(), + val consulPort: Int, + val objectMapper: ObjectMapper = ObjectMapper() + .registerModule(KotlinModule()) + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false), + override val grpcPort: Int = Ports.nextAvailable(), + override val appPort: Int = Ports.nextAvailable() +) : + EnvoyControlTestApp { + + override val appName = "envoy-control" + private lateinit var app: SpringApplicationBuilder + + private val baseProperties = mapOf( + "spring.profiles.active" to "test", + "spring.jmx.enabled" to false, + "envoy-control.source.consul.port" to consulPort, + "envoy-control.envoy.snapshot.outgoing-permissions.enabled" to true, + "envoy-control.sync.polling-interval" to Duration.ofSeconds(1).seconds, + "envoy-control.server.port" to grpcPort + ) + + override fun run() { + app = SpringApplicationBuilder(EnvoyControl::class.java).properties(baseProperties + properties) + app.run("--server.port=$appPort", "-e test") + logger.info("starting EC on port $appPort, grpc: $grpcPort, consul: $consulPort") + } + + override fun stop() { + app.context().close() + } + + override fun isHealthy(): Boolean = getApplicationStatusResponse().use { it.isSuccessful } + + override fun getHealthStatus(): Health { + val response = getApplicationStatusResponse() + return objectMapper.readValue(response.body()?.use { it.string() }, Health::class.java) + } + + override fun getState(): ServicesState { + val response = httpClient + .newCall( + Request.Builder() + .get() + .url("http://localhost:$appPort/state") + .build() + ) + .execute() + return objectMapper.readValue(response.body()?.use { it.string() }, ServicesState::class.java) + } + + private fun getApplicationStatusResponse(): Response = + httpClient + .newCall( + Request.Builder() + .get() + .url("http://localhost:$appPort/actuator/health") + .build() + ) + .execute() + + override fun bean(clazz: Class): T = app.context().getBean(clazz) + ?: throw IllegalStateException("Bean of type ${clazz.simpleName} not found in the context") + + companion object { + val logger by logger() + private val httpClient = OkHttpClient.Builder() + .build() + } +} + +data class Health( + val status: Status, + val details: Map +) + +data class HealthDetails( + val status: Status +) diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestConfiguration.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestConfiguration.kt new file mode 100644 index 000000000..0e9b96ea5 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/EnvoyControlTestConfiguration.kt @@ -0,0 +1,266 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config + +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.Response +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.ObjectAssert +import org.awaitility.Awaitility +import org.awaitility.Awaitility.await +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.AfterEach +import org.springframework.boot.actuate.health.Status +import pl.allegro.tech.servicemesh.envoycontrol.config.echo.EchoContainer +import pl.allegro.tech.servicemesh.envoycontrol.config.envoy.EnvoyContainer +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import java.time.Duration +import java.util.concurrent.TimeUnit +import kotlin.random.Random + +sealed class EnvoyConfigFile(val filePath: String) +object AdsAllDependencies : EnvoyConfigFile("envoy/config_ads_all_dependencies.yaml") +object Ads : EnvoyConfigFile("envoy/config_ads.yaml") +object Xds : EnvoyConfigFile("envoy/config_xds.yaml") +object RandomConfigFile : + EnvoyConfigFile(filePath = if (Random.nextBoolean()) Ads.filePath else Xds.filePath) + +abstract class EnvoyControlTestConfiguration : BaseEnvoyTest() { + companion object { + private val client = OkHttpClient.Builder() + // envoys default timeout is 15 seconds while OkHttp is 10 + .readTimeout(Duration.ofSeconds(20)) + .build() + + lateinit var envoyContainer: EnvoyContainer + lateinit var localServiceContainer: EchoContainer + lateinit var envoyControl1: EnvoyControlTestApp + lateinit var envoyControl2: EnvoyControlTestApp + var envoyControls: Int = 1 + + @JvmStatic + fun setup( + envoyConfig: EnvoyConfigFile = RandomConfigFile, + appFactoryForEc1: (Int) -> EnvoyControlTestApp = defaultAppFactory(), + appFactoryForEc2: (Int) -> EnvoyControlTestApp = appFactoryForEc1, + envoyControls: Int = 1, + envoyConnectGrpcPort: Int? = null, + envoyConnectGrpcPort2: Int? = null, + ec1RegisterPort: Int? = null, + ec2RegisterPort: Int? = null, + instancesInSameDc: Boolean = false + ) { + assertThat(envoyControls == 1 || envoyControls == 2).isTrue() + + localServiceContainer = EchoContainer().also { it.start() } + + Companion.envoyControls = envoyControls + + envoyControl1 = appFactoryForEc1(consulHttpPort).also { it.run() } + + if (envoyControls == 2) { + envoyControl2 = appFactoryForEc2(consul2HttpPort).also { it.run() } + } + + envoyContainer = createEnvoyContainer( + instancesInSameDc, + envoyConfig, + envoyConnectGrpcPort, + envoyConnectGrpcPort2 + ) + + waitForEnvoyControlsHealthy() + registerEnvoyControls(ec1RegisterPort, ec2RegisterPort, instancesInSameDc) + envoyContainer.start() + } + + @AfterAll + @JvmStatic + fun teardown() { + envoyContainer.stop() + envoyControl1.stop() + if (envoyControls == 2) { + envoyControl2.stop() + } + } + + private fun createEnvoyContainer( + instancesInSameDc: Boolean, + envoyConfig: EnvoyConfigFile, + envoyConnectGrpcPort: Int?, + envoyConnectGrpcPort2: Int? + ): EnvoyContainer { + return if (envoyControls == 2 && instancesInSameDc) { + EnvoyContainer( + envoyConfig.filePath, + localServiceContainer.ipAddress(), + envoyConnectGrpcPort ?: envoyControl1.grpcPort, + envoyConnectGrpcPort2 ?: envoyControl2.grpcPort + ).withNetwork(network) + } else { + EnvoyContainer( + envoyConfig.filePath, + localServiceContainer.ipAddress(), + envoyConnectGrpcPort ?: envoyControl1.grpcPort + ).withNetwork(network) + } + } + + fun registerEnvoyControls( + ec1RegisterPort: Int?, + ec2RegisterPort: Int?, + instancesInSameDc: Boolean + ) { + registerService( + "1", + envoyControl1.appName, + "localhost", + ec1RegisterPort ?: envoyControl1.appPort, + consulOperationsInFirstDc + ) + if (envoyControls == 2) { + registerService( + "2", + envoyControl2.appName, + "localhost", + ec2RegisterPort ?: envoyControl2.appPort, + if (instancesInSameDc) consulOperationsInFirstDc else consulOperationsInSecondDc + ) + } + } + + private fun waitForEnvoyControlsHealthy() { + await().atMost(30, TimeUnit.SECONDS).untilAsserted { + assertThat(envoyControl1.isHealthy()).isTrue() + if (envoyControls == 2) { + assertThat(envoyControl2.isHealthy()).isTrue() + } + } + } + + fun callEcho(url: String = envoyContainer.egressListenerUrl()): Response = + call("echo", url) + + fun callDomain(domain: String, url: String = envoyContainer.egressListenerUrl()): Response = + call(domain, url) + + fun callService(service: String, url: String = envoyContainer.egressListenerUrl()): Response = + call(service, url) + + private fun call(host: String, url: String = envoyContainer.egressListenerUrl()): Response = + client.newCall( + Request.Builder() + .get() + .header("Host", host) + .url(url) + .build() + ) + .execute() + + fun callServiceWithOriginalDst(originalDstUrl: String, envoyUrl: String): Response = + client.newCall( + Request.Builder() + .get() + .header("Host", "envoy-original-destination") + .header("x-envoy-original-dst-host", originalDstUrl) + .url(envoyUrl) + .build() + ) + .execute() + + fun callLocalService(endpoint: String, clientServiceName: String): Response = + client.newCall( + Request.Builder() + .get() + .header("x-service-name", clientServiceName) + .url(envoyContainer.ingressListenerUrl() + endpoint) + .build() + ) + .execute() + + private fun waitForConsulSync() { + await().atMost(defaultDuration).until { !callEcho().use { it.isSuccessful } } + } + + private fun defaultAppFactory(): (Int) -> EnvoyControlRunnerTestApp { + return { consulPort -> + EnvoyControlRunnerTestApp( + consulPort = consulPort + ) + } + } + } + + fun waitUntilEchoCalledThroughEnvoyResponds(target: EchoContainer) { + untilAsserted { + // when + val response = callEcho() + + // then + assertThat(response).isOk().isFrom(target) + } + } + + /** + * We have to retrieve the bean manually instead of @Autowired because the app is created in manual way + * instead of using the JUnit Spring Extension + */ + inline fun bean(): T = envoyControl1.bean(T::class.java) + + fun untilAsserted(wait: org.awaitility.Duration = defaultDuration, fn: () -> (Unit)) { + Awaitility.await().atMost(wait).untilAsserted(fn) + } + + fun ObjectAssert.isOk(): ObjectAssert { + matches { it.isSuccessful } + return this + } + + fun ObjectAssert.isFrom(echoContainer: EchoContainer): ObjectAssert { + matches { + it.body()?.use { it.string().contains(echoContainer.response) } ?: false + } + return this + } + + fun ObjectAssert.isUnreachable(): ObjectAssert { + matches({ + it.body()?.close() + it.code() == 503 || it.code() == 504 + }, "is unreachable") + return this + } + + fun ObjectAssert.isForbidden(): ObjectAssert { + matches({ + it.body()?.close() + it.code() == 403 + }, "is forbidden") + return this + } + + fun ObjectAssert.hasLocationHeaderFrom(serviceName: String): ObjectAssert { + matches { it.headers("location").contains("http://$serviceName/") } + return this + } + + fun ObjectAssert.isStatusHealthy(): ObjectAssert { + matches { it.status == Status.UP } + return this + } + + fun ObjectAssert.hasEnvoyControlCheckPassed(): ObjectAssert { + matches { it.details.get("envoyControl")?.status == Status.UP } + return this + } + + fun ObjectAssert.hasServiceStateChanged(): ObjectAssert { + matches { it.serviceNames().isNotEmpty() } + return this + } + + @AfterEach + fun cleanupTest() { + deregisterAllServices() + waitForConsulSync() + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulConfig.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulConfig.kt new file mode 100644 index 000000000..253cc7846 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulConfig.kt @@ -0,0 +1,52 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.consul + +import java.io.File + +sealed class ConsulConfig( + val id: Int, + val dc: String, + val config: Map, + val jsonFiles: List = listOf() +) { + fun launchCommand(): String { + val base = mapOf( + "datacenter" to dc + ) + + return "consul agent " + (config + base).map { (key, value) -> format(key, value) }.joinToString(" ") + } + + private fun format(key: String, value: String): String { + return if (value.isEmpty()) "-$key" else "-$key=$value" + } +} + +val defaultConfig = mapOf( + "data-dir" to "/data", + "pid-file" to ConsulContainer.pidFile, + "config-dir" to ConsulContainer.configDir, + "bind" to "0.0.0.0", + "client" to "0.0.0.0" +) + +class ConsulClientConfig(id: Int, dc: String, serverAddress: String, jsonFiles: List = listOf()) : ConsulConfig( + id, + dc, + defaultConfig + mapOf( + "retry-join" to serverAddress, + "node" to "consul-client-$id" + ), + jsonFiles +) + +class ConsulServerConfig(id: Int, dc: String, jsonFiles: List = listOf()) : ConsulConfig( + id, + dc, + defaultConfig + mapOf( + "server" to "", + "bootstrap-expect" to "3", + "ui" to "", + "node" to "consul-server-$dc-$id" + ), + jsonFiles +) diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulContainer.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulContainer.kt new file mode 100644 index 000000000..282c9afa0 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulContainer.kt @@ -0,0 +1,81 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.consul + +import org.testcontainers.containers.BindMode +import org.testcontainers.containers.wait.strategy.Wait +import org.testcontainers.images.builder.ImageFromDockerfile +import pl.allegro.tech.servicemesh.envoycontrol.testcontainers.GenericContainer + +class ConsulContainer( + private val dc: String, + private val externalPort: Int, + private val id: Int, + private val consulConfig: ConsulConfig = ConsulServerConfig(id, dc), + val internalPort: Int = 8500 +) : GenericContainer( + ImageFromDockerfile().withDockerfileFromBuilder { + it.from("consul:latest") + .run("apk", "add", "iproute2") + .cmd(consulConfig.launchCommand()) + .expose(internalPort) + .build() + }) { + + companion object { + const val pidFile = "/tmp/consul.pid" + const val configDir = "/consul/config" + } + + override fun configure() { + super.configure() + portBindings.add("$externalPort:$internalPort") + awaitConsulReady() + } + + private fun awaitConsulReady(): ConsulContainer { + consulConfig.jsonFiles.forEach { jsonFile -> + withClasspathResourceMapping(jsonFile.path, "$configDir/${jsonFile.name}", BindMode.READ_ONLY) + } + withPrivilegedMode(true) + return when (consulConfig) { + is ConsulServerConfig -> waitingFor(Wait.forHttp("/ui").forStatusCode(200)) + is ConsulClientConfig -> waitingFor(Wait.forHttp("/v1/status/leader").forStatusCode(200)) + } + } + + fun blockExternalTraffic() { + val commands = arrayOf( + "iptables -F", + "iptables -P INPUT DROP", + "iptables -P OUTPUT DROP", + "iptables -P FORWARD DROP", + "iptables -A INPUT -i lo -j ACCEPT", + "iptables -A OUTPUT -o lo -j ACCEPT" + ) + + runCommands(commands) + } + + fun unblockExternalTraffic() { + val commands = arrayOf( + "iptables -F", + "iptables -P INPUT ACCEPT", + "iptables -P FORWARD ACCEPT", + "iptables -P OUTPUT ACCEPT" + ) + + runCommands(commands) + } + + private fun sendSignal(signal: String) { + val pid = this.execInContainer("cat", pidFile).stdout + this.execInContainer("kill", "-$signal", pid) + } + + override fun sigstop() { + sendSignal("STOP") + } + + override fun sigcont() { + sendSignal("CONT") + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulOperations.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulOperations.kt new file mode 100644 index 000000000..05654c0a3 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulOperations.kt @@ -0,0 +1,59 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.consul + +import com.ecwid.consul.v1.ConsulClient +import com.ecwid.consul.v1.agent.model.NewService +import java.util.UUID + +class ConsulOperations(port: Int) { + + private val client = ConsulClient("localhost", port) + + fun registerService( + id: String = UUID.randomUUID().toString(), + name: String = "sample", + address: String = "localhost", + port: Int = 1234, + registerDefaultCheck: Boolean = false + ): String { + val service = NewService().also { + it.id = id + it.name = name + it.address = address + it.port = port + it.tags = listOf("a") + it.check = if (registerDefaultCheck) NewService.Check().also { check -> + check.http = "http://$address:$port" + check.interval = "3s" + } else NewService.Check() + } + client.agentServiceRegister(service) + return service.id + } + + fun deregisterService(id: String) { + client.agentServiceDeregister(id) + } + + fun deregisterAll() { + registeredServices().forEach { deregisterService(it) } + } + + fun anyRpcOperation(): String { + return leader() + } + + fun leader(): String { + return client.statusLeader.value + } + + fun peers(): List { + return client.statusPeers.value + } + + private fun registeredServices() = + client.agentServices + .value + .values + .filter { !it.service.contains("envoy-control") } + .map { it.id } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulSetup.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulSetup.kt new file mode 100644 index 000000000..31ddf444d --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/consul/ConsulSetup.kt @@ -0,0 +1,16 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.consul + +import com.pszymczyk.consul.infrastructure.Ports +import org.testcontainers.containers.Network +import org.testcontainers.junit.jupiter.Testcontainers + +@Testcontainers +class ConsulSetup( + network: Network, + consulConfig: ConsulConfig, + val port: Int = Ports.nextAvailable() +) { + val container: ConsulContainer = ConsulContainer(consulConfig.dc, port, consulConfig.id, consulConfig) + .withNetwork(network) + val consulOperations = ConsulOperations(port) +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/echo/EchoContainer.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/echo/EchoContainer.kt new file mode 100644 index 000000000..add5e436a --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/echo/EchoContainer.kt @@ -0,0 +1,25 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.echo + +import org.testcontainers.containers.wait.strategy.Wait +import pl.allegro.tech.servicemesh.envoycontrol.config.BaseEnvoyTest +import pl.allegro.tech.servicemesh.envoycontrol.testcontainers.GenericContainer +import java.util.UUID + +class EchoContainer : GenericContainer("hashicorp/http-echo:latest") { + + val response = UUID.randomUUID().toString() + + override fun configure() { + super.configure() + withExposedPorts(PORT) + withNetwork(BaseEnvoyTest.network) + withCommand(String.format("-text=%s", response)) + waitingFor(Wait.forHttp("/").forStatusCode(200)) + } + + fun address(): String = "${ipAddress()}:$PORT" + + companion object { + const val PORT = 5678 + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyAdmin.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyAdmin.kt new file mode 100644 index 000000000..99751c53a --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyAdmin.kt @@ -0,0 +1,113 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.envoy + +import com.fasterxml.jackson.annotation.JsonAlias +import com.fasterxml.jackson.databind.DeserializationFeature +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.KotlinModule +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.Response +import pl.allegro.tech.servicemesh.envoycontrol.config.echo.EchoContainer + +class EnvoyAdmin( + val address: String, + val objectMapper: ObjectMapper = ObjectMapper() + .registerModule(KotlinModule()) + .configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) +) { + + fun cluster(name: String): ClusterStatus? = + clusters() + .filter { it.name == name } + .firstOrNull() + + fun numOfEndpoints(clusterName: String): Int = + cluster(clusterName) + ?.hostStatuses + ?.size + ?: 0 + + fun endpointsAddress(clusterName: String): List = + cluster(clusterName) + ?.hostStatuses + ?.mapNotNull { it.address } + ?.mapNotNull { it.socketAddress } + ?: emptyList() + + fun hostStatus(clusterName: String, ip: String): HostStatus? = + cluster(clusterName) + ?.hostStatuses + ?.find { + it.address?.socketAddress?.address == ip + } + + fun statValue(statName: String): String? = get("stats?filter=$statName").body()?.use { + it.string().lines().first().split(":").get(1).trim() + } + + private fun clusters(): List { + val response = get("clusters?format=json") + return response.body().use { + objectMapper.readValue(it?.string(), ClusterStatuses::class.java).clusterStatuses + } + } + + fun zone(cluster: String, ip: String): AdminInstance? { + val regex = "$cluster::$ip:${EchoContainer.PORT}::zone::(.+)".toRegex() + val response = get("clusters") + return response.body()?.use { it.string().lines() } + ?.find { it.matches(regex) } + ?.let { AdminInstance(ip, zone = regex.find(it)!!.groupValues[1]) } + } + + private val client = OkHttpClient.Builder() + .build() + + private fun get(path: String): Response = + client.newCall( + Request.Builder() + .get() + .url("$address/$path") + .build() + ) + .execute() + + data class AdminInstance(val ip: String, val zone: String) +} + +data class ClusterStatuses( + @JsonAlias("cluster_statuses") val clusterStatuses: List +) + +data class ClusterStatus( + val name: String?, + @JsonAlias("host_statuses") val hostStatuses: List?, + @JsonAlias("added_via_api") val addedViaApi: Boolean? +) + +data class HostStatus( + val address: Address?, + val stats: List, + @JsonAlias("health_status") val healthStatus: HealthStatus, + val weight: Int? +) + +data class Stats( + val name: String?, + val type: String?, + val value: String? +) + +data class Address( + @JsonAlias("socket_address") val socketAddress: SocketAddress? +) + +data class SocketAddress( + val address: String?, + @JsonAlias("port_value") val portValue: Int? +) + +data class HealthStatus( + @JsonAlias("eds_health_status") val edsHealthStatus: String?, + @JsonAlias("failed_outlier_check") val failedOutlierCheck: Boolean? +) diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyContainer.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyContainer.kt new file mode 100644 index 000000000..06e788bc9 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/config/envoy/EnvoyContainer.kt @@ -0,0 +1,71 @@ +package pl.allegro.tech.servicemesh.envoycontrol.config.envoy + +import com.github.dockerjava.api.command.InspectContainerResponse +import org.testcontainers.containers.BindMode +import org.testcontainers.containers.output.Slf4jLogConsumer +import org.testcontainers.images.builder.ImageFromDockerfile +import pl.allegro.tech.servicemesh.envoycontrol.testcontainers.GenericContainer +import pl.allegro.tech.servicemesh.envoycontrol.logger as loggerDelegate + +class EnvoyContainer( + private val configPath: String, + private val localServiceIp: String, + private val envoyControl1XdsPort: Int, + private val envoyControl2XdsPort: Int = envoyControl1XdsPort +) : GenericContainer(ImageFromDockerfile().withDockerfileFromBuilder { + it.from("envoyproxy/envoy-alpine:latest") // TODO (GITHUB-ISSUE): NOT latest, + // whatever is tagged latest in local cache is considered latest, C'MON + // this should be possible to overcome soon: https://github.com/moby/moby/issues/13331 + // but it's not great in the long run if future updates break the build + .run("apk --no-cache add curl iproute2") + .build() +}) { + + companion object { + val logger by loggerDelegate() + + private const val CONFIG_DEST = "/etc/envoy/envoy.yaml" + private const val LAUNCH_ENVOY_SCRIPT = "envoy/launch_envoy.sh" + private const val LAUNCH_ENVOY_SCRIPT_DEST = "/usr/local/bin/launch_envoy.sh" + + private const val EGRESS_LISTENER_CONTAINER_PORT = 5000 + private const val INGRESS_LISTENER_CONTAINER_PORT = 5001 + private const val ADMIN_PORT = 10000 + } + + override fun configure() { + super.configure() + + withClasspathResourceMapping( + LAUNCH_ENVOY_SCRIPT, + LAUNCH_ENVOY_SCRIPT_DEST, + BindMode.READ_ONLY + ) + withClasspathResourceMapping(configPath, CONFIG_DEST, BindMode.READ_ONLY) + + withExposedPorts(EGRESS_LISTENER_CONTAINER_PORT, INGRESS_LISTENER_CONTAINER_PORT, ADMIN_PORT) + withPrivilegedMode(true) + + withCommand( + "/bin/sh", "/usr/local/bin/launch_envoy.sh", + Integer.toString(envoyControl1XdsPort), + Integer.toString(envoyControl2XdsPort), + CONFIG_DEST, + localServiceIp, + "-l", "debug" + ) + } + + override fun containerIsStarting(containerInfo: InspectContainerResponse?) { + followOutput(Slf4jLogConsumer(logger).withPrefix("ENVOY")) + super.containerIsStarting(containerInfo) + } + + fun egressListenerUrl() = "http://$containerIpAddress:${getMappedPort(EGRESS_LISTENER_CONTAINER_PORT)}/" + + fun ingressListenerUrl() = "http://$containerIpAddress:${getMappedPort(INGRESS_LISTENER_CONTAINER_PORT)}" + + fun adminUrl() = "http://$containerIpAddress:${getMappedPort(ADMIN_PORT)}" + + fun admin() = EnvoyAdmin(adminUrl()) +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulInstancePropagationTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulInstancePropagationTest.kt new file mode 100644 index 000000000..549a87536 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulInstancePropagationTest.kt @@ -0,0 +1,166 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import okhttp3.Response +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.Assertions.fail +import org.assertj.core.api.ObjectAssert +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Tag +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.AdsAllDependencies +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.echo.EchoContainer +import pl.allegro.tech.servicemesh.envoycontrol.logger +import java.time.Duration +import java.util.concurrent.Executors +import java.util.concurrent.atomic.LongAdder +import kotlin.random.Random + +@Tag("reliability") +class ConsulInstancePropagationTest : EnvoyControlTestConfiguration() { + + companion object { + private val logger by logger() + + private const val verificationTimes = 1 + private const val services = 20 + private const val repeatScenarios = 10 + + @JvmStatic + @BeforeAll + fun setupPropagationTest() { + setup( + envoyConfig = AdsAllDependencies, + appFactoryForEc1 = { consulPort -> + EnvoyControlRunnerTestApp( + properties = mapOf( + "envoy-control.envoy.snapshot.stateSampleDuration" to Duration.ofSeconds(0), + "envoy-control.envoy.snapshot.outgoing-permissions.servicesAllowedToUseWildcard" to "test-service" + ), + consulPort = consulPort + ) + } + ) + } + } + + /** + * This test is meant to stress test the instance propagation from Consul to Envoy. + * To do the stress test, bump the parameters to higher values ex. services = 250, repeatScenarios = 50. + */ + @Test + fun `should test multiple services propagation`() { + // given + val runCount = LongAdder() + val threadPool = Executors.newFixedThreadPool(services) + + // when + val futures = (1..services).map { + threadPool.submit { runAllScenarios(runCount) } + } + + // then + try { + futures.map { it.get() } + } catch (e: Exception) { + fail("Error running scenarios ${e.message}") + } finally { + assertThat(runCount.sum().toInt()).isEqualTo(services * repeatScenarios) + } + } + + private fun runAllScenarios(runCount: LongAdder) { + val scenarios = Scenarios("echo-" + Random.nextInt(10_000_000).toString()) + repeat(repeatScenarios) { + Thread.sleep(Random.nextLong(1000)) + logger.info("Running scenarios for ${scenarios.serviceName} for ${it + 1} time") + with(scenarios) { + try { + spawnFirstInstance() + spawnSecondInstance() + destroySecondInstance() + destroyLastInstance() + } catch (e: Throwable) { + logger.error("Error while running scenario", e) + throw e + } + } + runCount.increment() + } + } + + inner class Scenarios(val serviceName: String) { + var firstInstanceId: String? = null + var secondInstanceId: String? = null + + fun spawnFirstInstance() { + firstInstanceId = registerService( + id = "$serviceName-1", + name = serviceName, + container = echoContainer + ) + waitForEchosInAdmin(echoContainer) + repeat(verificationTimes) { + callService(serviceName).use { + assertThat(it).isOk().isFrom(echoContainer) + } + } + } + + fun spawnSecondInstance() { + secondInstanceId = registerService( + id = "$serviceName-2", + name = serviceName, + container = echoContainer2 + ) + waitForEchosInAdmin(echoContainer, echoContainer2) + repeat(verificationTimes) { + callService(serviceName).use { + assertThat(it).isOk().isEitherFrom(echoContainer, echoContainer2) + } + } + } + + fun destroySecondInstance() { + deregisterService(secondInstanceId!!) + waitForEchosInAdmin(echoContainer) + repeat(verificationTimes) { + callService(serviceName).use { + assertThat(it).isOk().isFrom(echoContainer) + } + } + } + + fun destroyLastInstance() { + deregisterService(firstInstanceId!!) + waitForEchosInAdmin() + repeat(verificationTimes) { + callService(serviceName).use { + assertThat(it).isUnreachable() + } + } + } + + private val admin = envoyContainer.admin() + + private fun waitForEchosInAdmin(vararg containers: EchoContainer) { + untilAsserted { + val addresses = admin + .endpointsAddress(clusterName = serviceName) + .map { "${it.address}:${it.portValue}" } + assertThat(addresses) + .hasSize(containers.size) + .containsExactlyInAnyOrderElementsOf(containers.map { it.address() }) + } + } + } + + fun ObjectAssert.isEitherFrom(vararg echoContainers: EchoContainer): ObjectAssert { + matches { + val serviceResponse = it.body()?.string() ?: "" + echoContainers.any { container -> serviceResponse.contains(container.response) } + } + return this + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulRpcLimitReachedTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulRpcLimitReachedTest.kt new file mode 100644 index 000000000..189a5aea0 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ConsulRpcLimitReachedTest.kt @@ -0,0 +1,81 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import com.ecwid.consul.v1.OperationException +import org.assertj.core.api.Assertions.assertThat +import org.awaitility.Duration +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulOperations +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort + +@Suppress("SwallowedException") +internal class ConsulRpcLimitReachedTest : ReliabilityTest() { + + companion object { + @JvmStatic + @BeforeAll + fun setup() { + setup( + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + consulPort = lowRpcConsulClient.port, + grpcPort = toxiproxyGrpcPort + ) + }, + envoyConnectGrpcPort = externalEnvoyControl1GrpcPort + ) + } + } + + @Test + fun `is resilient to ECs consul client reaching RPC limit`() { + // given + // if failureDuration is Duration(1, SECONDS).divide(2) then Duration(0, SECONDS) + registerEchoInOtherAgentAfter(failureDuration.divide(2L)) + + // when + holdAssertionsTrue(interval = Duration.ONE_SECOND) { + rpcLimitReached() + } + + // then + assertEchoReachableThroughProxy() + } + + private fun assertEchoReachableThroughProxy() { + untilAsserted(wait = defaultDuration.multiply(2L)) { + callService("echo").use { + assertThat(it).isOk().isFrom(echoContainer) + } + } + } + + private fun registerEchoInOtherAgentAfter(time: Duration) { + Thread { + Thread.sleep(time.valueInMS) + registerService(name = "echo") + }.start() + } + + private fun rpcLimitReached() { + untilAsserted { + val limitReached = burstRpcLimit(lowRpcConsulClient.consulOperations) + assertThat(limitReached).isEqualTo(true) + } + } + + private fun burstRpcLimit(consulOperations: ConsulOperations): Boolean { + var limitReached = false + repeat(5) { + try { + consulOperations.anyRpcOperation() + } catch (e: OperationException) { + limitReached = true + } + } + + return limitReached + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/DcCutOffTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/DcCutOffTest.kt new file mode 100644 index 000000000..a7fd1dc1e --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/DcCutOffTest.kt @@ -0,0 +1,137 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulSetup +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2HttpPort + +class DcCutOffTest : ReliabilityTest() { + companion object { + + private val properties = mapOf( + "envoy-control.sync.enabled" to true + ) + + @JvmStatic + @BeforeAll + fun setup() { + setup( + envoyControls = 2, + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + consulPort = consulHttpPort, + properties = properties, + appPort = ec1HttpPort + ) + }, + appFactoryForEc2 = { + EnvoyControlRunnerTestApp( + consulPort = consul2HttpPort, + appPort = ec2HttpPort + ) + }, + ec1RegisterPort = externalEnvoyControl1HttpPort, + ec2RegisterPort = externalEnvoyControl2HttpPort + ) + } + } + + @Test + fun `should be resilient to transient unavailability of one DC`() { + // given + val id = registerServiceInRemoteDc("echo", echoContainer) + + // then + waitUntilEchoCalledThroughEnvoyResponds(echoContainer) + + // when + cutOffConnectionBetweenDCs() + makeChangesInRemoteDcAsynchronously(id) + + // then + holdAssertionsTrue { + assertUnreachableThroughEnvoy("echo") + } + + // when + restoreConnectionBetweenDCs() + + // then + waitUntilEchoCalledThroughEnvoyResponds(echoContainer2) + } + + private fun makeChangesInRemoteDcAsynchronously(id: String) { + Thread { + // if failureDuration is Duration(1, SECONDS).divide(2) then Duration(0, SECONDS) + Thread.sleep(failureDuration.divide(2L).valueInMS) + deregisterServiceInRemoteDc(id) + registerServiceInRemoteDc("echo", echoContainer2) + }.start() + } + + private fun cutOffConnectionBetweenDCs() { + cutOffConnectionBetweenECs() + cutOffConnectionBetweenConsuls() + cutOffConnectionToServicesInDc2() + } + + private fun cutOffConnectionBetweenConsuls() { + blockConsulTraffic(consulMastersInDc1, consulMastersInDc2) + blockConsulTraffic(consulMastersInDc2, consulMastersInDc1) + } + + private fun cutOffConnectionToServicesInDc2() { + envoyContainer.blockTrafficTo(echoContainer.ipAddress()) + envoyContainer.blockTrafficTo(echoContainer2.ipAddress()) + } + + private fun restoreConnectionToServicesInDc2() { + envoyContainer.unblockTrafficTo(echoContainer.ipAddress()) + envoyContainer.unblockTrafficTo(echoContainer2.ipAddress()) + } + + private fun blockConsulTraffic(from: List, to: List) { + modifyConnection(to, from, ModifyConnection.BLOCK) + } + + private fun restoreConsulTraffic(from: List, to: List) { + modifyConnection(to, from, ModifyConnection.RESTORE) + } + + private enum class ModifyConnection { BLOCK, RESTORE } + + private fun modifyConnection( + to: List, + from: List, + operation: ModifyConnection + ) { + val peers = to[0].consulOperations.peers().map { ip -> ip.split(":")[0] } + peers.forEach { ip -> + from.forEach { consul -> + if (operation == ModifyConnection.BLOCK) { + consul.container.blockTrafficTo(ip) + } else if (operation == ModifyConnection.RESTORE) { + consul.container.unblockTrafficTo(ip) + } + } + } + } + + private fun restoreConnectionBetweenDCs() { + restoreConnectionBetweenECs() + restoreConnectionBetweenConsuls() + restoreConnectionToServicesInDc2() + // TODO: GITHUB-ISSUE + // consul master has problem to reconnect to dc2 and container restart helps + consulMastersInDc1[0].container.restart() + } + + private fun restoreConnectionBetweenConsuls() { + restoreConsulTraffic(consulMastersInDc1, consulMastersInDc2) + restoreConsulTraffic(consulMastersInDc2, consulMastersInDc1) + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnovyControlInstanceDownInOneDc.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnovyControlInstanceDownInOneDc.kt new file mode 100644 index 000000000..e88ab7509 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnovyControlInstanceDownInOneDc.kt @@ -0,0 +1,76 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort2 + +internal class EnovyControlInstanceDownInOneDc : ReliabilityTest() { + + companion object { + + @JvmStatic + @BeforeAll + fun setup() { + setup( + envoyControls = 2, + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + consulPort = consulHttpPort, + appPort = ec1HttpPort, + grpcPort = toxiproxyGrpcPort + ) + }, + appFactoryForEc2 = { + EnvoyControlRunnerTestApp( + consulPort = consulHttpPort, + appPort = ec2HttpPort, + grpcPort = toxiproxyGrpcPort2 + ) + }, + ec1RegisterPort = externalEnvoyControl1HttpPort, + ec2RegisterPort = externalEnvoyControl2HttpPort, + envoyConnectGrpcPort = externalEnvoyControl1GrpcPort, + envoyConnectGrpcPort2 = externalEnvoyControl2GrpcPort, + instancesInSameDc = true + ) + } + } + + @Test + fun `is resilient to one instance of EnvoyControl failure in same dc`() { + // given - force envoy to make grpc connection with first EC instance + makeEnvoyControl2Unavailable() + + // when + registerService(name = "service-1") + + // then - ensure it has grpc connection by calling service + assertReachableThroughEnvoy("service-1") + + // when - break grpc connection by making first instance unavailable + makeEnvoyControlUnavailable() + + // and + registerService(name = "service-2") + + // then - ensure that there is no grpc connection envoy - ec + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // when - start second instance of EC + makeEnvoyControl2Available() + + // then - ensure that envoy connects to second instance + assertReachableThroughEnvoy("service-2") + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInAllDcs.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInAllDcs.kt new file mode 100644 index 000000000..15bfd623b --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInAllDcs.kt @@ -0,0 +1,79 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort2 + +internal class EnvoyControlDownInAllDcs : ReliabilityTest() { + + companion object { + + private val properties = mapOf( + "envoy-control.sync.enabled" to true + ) + + @JvmStatic + @BeforeAll + fun setup() { + setup( + envoyControls = 2, + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + properties = properties, + consulPort = consulHttpPort, + appPort = ec1HttpPort, + grpcPort = toxiproxyGrpcPort + ) + }, + appFactoryForEc2 = { + EnvoyControlRunnerTestApp( + properties = properties, + consulPort = consul2HttpPort, + appPort = ec2HttpPort, + grpcPort = toxiproxyGrpcPort2 + ) + }, + ec1RegisterPort = externalEnvoyControl1HttpPort, + ec2RegisterPort = externalEnvoyControl2HttpPort, + envoyConnectGrpcPort = externalEnvoyControl1GrpcPort, + envoyConnectGrpcPort2 = externalEnvoyControl2GrpcPort + ) + } + } + + @Test + fun `should allow to communicate between already known clusters when all ECs are down`() { + // given + registerServiceInRemoteDc(name = "service-1") + assertReachableThroughEnvoy("service-1") + + // when + makeEnvoyControlUnavailable() + makeEnvoyControl2Unavailable() + + // and + registerService(name = "service-2") + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // when + makeEnvoyControlAvailable() + makeEnvoyControl2Available() + + // then + assertReachableThroughEnvoy("service-1") + assertReachableThroughEnvoy("service-2") + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInOneDc.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInOneDc.kt new file mode 100644 index 000000000..de7a7f05f --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownInOneDc.kt @@ -0,0 +1,75 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.ec2HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1HttpPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl2HttpPort + +internal class EnvoyControlDownInOneDc : ReliabilityTest() { + + companion object { + + private val properties = mapOf( + "envoy-control.sync.enabled" to true + ) + + @JvmStatic + @BeforeAll + fun setup() { + setup( + envoyControls = 2, + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + consulPort = consulHttpPort, + properties = properties, + appPort = ec1HttpPort + ) + }, + appFactoryForEc2 = { + EnvoyControlRunnerTestApp( + consulPort = consul2HttpPort, + appPort = ec2HttpPort + ) + }, + ec1RegisterPort = externalEnvoyControl1HttpPort, + ec2RegisterPort = externalEnvoyControl2HttpPort + ) + } + } + + @Test + fun `should be resilient to transient unavailability of EC in one DC`() { + // given + val id = registerServiceInRemoteDc("echo", echoContainer) + + // then + waitUntilEchoCalledThroughEnvoyResponds(echoContainer) + + // when + cutOffConnectionBetweenECs() + makeChangesInRemoteDcAsynchronously(id) + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("echo") + } + + // when + restoreConnectionBetweenECs() + + // then + waitUntilEchoCalledThroughEnvoyResponds(echoContainer2) + } + + private fun makeChangesInRemoteDcAsynchronously(id: String) { + Thread { + // if failureDuration is Duration(1, SECONDS).divide(2) then Duration(0, SECONDS) + Thread.sleep(failureDuration.divide(2L).valueInMS) + deregisterServiceInRemoteDc(id) + registerServiceInRemoteDc("echo", echoContainer2) + }.start() + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownTest.kt new file mode 100644 index 000000000..939bc44a8 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/EnvoyControlDownTest.kt @@ -0,0 +1,32 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.Test + +internal class EnvoyControlDownTest : ReliabilityTest() { + + @Test + fun `is resilient to EnvoyControl failure in one dc`() { + // given + registerService(name = "service-1") + assertReachableThroughEnvoy("service-1") + + // when + makeEnvoyControlUnavailable() + + // Service registration is not affected by injected Consul faults, it bypasses toxiproxy + registerService(name = "service-2") + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // and when + makeEnvoyControlAvailable() + + // then + assertReachableThroughEnvoy("service-1") + assertReachableThroughEnvoy("service-2") + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentDownTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentDownTest.kt new file mode 100644 index 000000000..f0d23fcb9 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentDownTest.kt @@ -0,0 +1,68 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.consulProxy + +internal class LocalConsulAgentDownTest : ReliabilityTest() { + @Test + fun `is resilient to transient unavailability of EC's local Consul agent`() { + // given + registerService(name = "service-1") + assertReachableThroughEnvoy("service-1") + + // when + makeConsulUnavailable() + // Service registration is not affected by injected Consul faults, it bypasses toxiproxy + registerService(name = "service-2") + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // and when + makeConsulAvailable() + + // then + assertReachableThroughEnvoy("service-1") + assertReachableThroughEnvoy("service-2") + } + + @Test + fun `is resilient to transient unavailability of target service's local Consul agent`() { + // given + registerService(name = "echo", container = echoContainer, consulOps = consulAgentInDc1.consulOperations, registerDefaultCheck = true) + // then + assertReachableThroughEnvoy("echo") + + // when + makeServiceConsulAgentUnavailable() + // then + holdAssertionsTrue { + assertUnreachableThroughEnvoy("echo") + } + + // when + makeServiceConsulAgentAvailable() + + // then + assertReachableThroughEnvoy("echo") + } + + private fun makeServiceConsulAgentAvailable() { + consulAgentInDc1.container.unblockExternalTraffic() + } + + private fun makeServiceConsulAgentUnavailable() { + consulAgentInDc1.container.blockExternalTraffic() + } + + private fun makeConsulAvailable() { + consulProxy.enable() + } + + private fun makeConsulUnavailable() { + consulProxy.disable() + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentToMasterCutOff.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentToMasterCutOff.kt new file mode 100644 index 000000000..71076d753 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/LocalConsulAgentToMasterCutOff.kt @@ -0,0 +1,39 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.Test + +internal class LocalConsulAgentToMasterCutOff : ReliabilityTest() { + + @Test + fun `should register service when communication between local agent and master is restored`() { + // given + registerService(name = "service-1") + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + + // when + consulMastersInDc1.forEach { + it.container.blockExternalTraffic() + } + + // and + registerService(name = "service-2", consulOps = consulAgentInDc1.consulOperations) + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // when + consulMastersInDc1.forEach { + it.container.unblockExternalTraffic() + } + + // then + holdAssertionsTrue { + assertReachableThroughEnvoy("service-1") + assertReachableThroughEnvoy("service-2") + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/NoConsulLeaderTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/NoConsulLeaderTest.kt new file mode 100644 index 000000000..a70a7ee84 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/NoConsulLeaderTest.kt @@ -0,0 +1,45 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.junit.jupiter.api.Test + +internal class NoConsulLeaderTest : ReliabilityTest() { + @Test + fun `is resilient to consul cluster without a leader`() { + // given + registerService(name = "service-1") + assertReachableThroughEnvoy("service-1") + + // when + makeConsulClusterLoseLeader() + assertConsulHasNoLeader() + + registerService(name = "service-2") + + // then + holdAssertionsTrue { + assertConsulHasNoLeader() + assertReachableThroughEnvoy("service-1") + assertUnreachableThroughEnvoy("service-2") + } + + // when + makeConsulClusterRegainLeader() + + // then + assertConsulHasALeader() + assertReachableThroughEnvoy("service-1") + assertReachableThroughEnvoy("service-2") + } + + private fun makeConsulClusterRegainLeader() { + consulMastersInDc1.drop(1).forEach { consul -> + consul.container.sigcont() + } + } + + private fun makeConsulClusterLoseLeader() { + consulMastersInDc1.drop(1).forEach { consul -> + consul.container.sigstop() + } + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ReliabilityTest.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ReliabilityTest.kt new file mode 100644 index 000000000..a841086c5 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ReliabilityTest.kt @@ -0,0 +1,174 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import com.google.common.base.Strings +import org.assertj.core.api.Assertions.assertThat +import org.awaitility.Duration +import org.junit.jupiter.api.AfterAll +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.BeforeAll +import org.junit.jupiter.api.Tag +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlRunnerTestApp +import pl.allegro.tech.servicemesh.envoycontrol.config.EnvoyControlTestConfiguration +import pl.allegro.tech.servicemesh.envoycontrol.config.consul.ConsulOperations +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.envoyControl1HttpProxy +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.envoyControl1Proxy +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.envoyControl2HttpProxy +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.envoyControl2Proxy +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalConsulPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.externalEnvoyControl1GrpcPort +import pl.allegro.tech.servicemesh.envoycontrol.reliability.Toxiproxy.Companion.toxiproxyGrpcPort +import java.util.concurrent.TimeUnit + +@Tag("reliability") +open class ReliabilityTest : EnvoyControlTestConfiguration() { + + companion object { + @JvmStatic + @BeforeAll + fun setup() { + setup( + appFactoryForEc1 = { + EnvoyControlRunnerTestApp( + consulPort = externalConsulPort, + grpcPort = toxiproxyGrpcPort + ) + }, + envoyConnectGrpcPort = externalEnvoyControl1GrpcPort + ) + } + + @JvmStatic + @AfterAll + fun after() { + teardown() + makeEnvoyControlAvailable() + makeEnvoyControl2Available() + } + + fun makeEnvoyControlAvailable() { + envoyControl1Proxy.enable() + envoyControl1HttpProxy.enable() + } + + fun makeEnvoyControlUnavailable() { + envoyControl1Proxy.disable() + envoyControl1HttpProxy.disable() + } + + fun makeEnvoyControl2Available() { + envoyControl2Proxy.enable() + envoyControl2HttpProxy.enable() + } + + fun makeEnvoyControl2Unavailable() { + envoyControl2Proxy.disable() + envoyControl2HttpProxy.disable() + } + + fun cutOffConnectionBetweenECs() { + envoyControl1HttpProxy.disable() + envoyControl2HttpProxy.disable() + } + + fun restoreConnectionBetweenECs() { + envoyControl1HttpProxy.enable() + envoyControl2HttpProxy.enable() + } + } + + @AfterEach + fun cleanup() { + ((consulMastersInDc1 + consulMastersInDc2).map { + it.container + } + listOf(envoyContainer, echoContainer, echoContainer2)).forEach { + it.sigcont() + it.clearAllIptablesRules() + } + } + + fun assertConsulHasNoLeader(consulOperations: ConsulOperations = consulOperationsInFirstDc) { + untilAsserted { + assertThat(consulOperations.leader()).isEmpty() + } + } + + fun assertConsulHasALeader(consulOperations: ConsulOperations = consulOperationsInFirstDc) { + untilAsserted { + assertThat(consulOperations.leader()).isNotEmpty() + } + } + + fun assertReachableThroughEnvoy(service: String) { + untilAsserted { + assertReachableThroughEnvoyOnce(service) + } + } + + fun assertReachableThroughEnvoyOnce(service: String) { + callService(service).use { + assertThat(it).isOk().isFrom(echoContainer) + } + } + + fun assertUnreachableThroughEnvoy(service: String) { + untilAsserted { + assertUnreachableThroughEnvoyOnce(service) + } + } + + fun assertUnreachableThroughEnvoyOnce(service: String) { + callService(service).use { + assertThat(it).isUnreachable() + } + } + + fun holdAssertionsTrue( + duration: Duration = failureDuration, + interval: Duration, + assertion: () -> Unit + ) { + val intervalInMs = interval.valueInMS + val probes = duration.valueInMS / intervalInMs + runRepeat(probes, intervalInMs, assertion) + } + + fun holdAssertionsTrue( + duration: Duration = failureDuration, + probes: Long = 10L, + assertion: () -> Unit + ) { + val millis = duration.valueInMS + val interval = millis / probes + runRepeat(probes, interval, assertion) + } + + private fun runRepeat(probes: Long, intervalInMs: Long, assertion: () -> Unit) { + if (probes == 0L) { + repeatWithSleep(1, intervalInMs) { + assertion() + } + } else { + repeatWithSleep(probes, intervalInMs) { + assertion() + } + } + } + + private fun repeatWithSleep(probes: Long, interval: Long, assertion: () -> Unit) { + repeat(probes.toInt()) { + assertion() + + if (interval > 0) { + Thread.sleep(interval) + } + } + } + + val failureDuration = Duration( + System.getProperty("RELIABILITY_FAILURE_DURATION_SECONDS") + ?.let { Strings.emptyToNull(it) } + ?.toLong() + ?: 20, + TimeUnit.SECONDS + ) +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/Toxiproxy.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/Toxiproxy.kt new file mode 100644 index 000000000..c7e2d9f29 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/Toxiproxy.kt @@ -0,0 +1,76 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import com.pszymczyk.consul.infrastructure.Ports +import eu.rekawek.toxiproxy.Proxy +import eu.rekawek.toxiproxy.ToxiproxyClient +import org.testcontainers.junit.jupiter.Testcontainers +import pl.allegro.tech.servicemesh.envoycontrol.config.BaseEnvoyTest.Companion.consul +import pl.allegro.tech.servicemesh.envoycontrol.config.BaseEnvoyTest.Companion.network +import pl.allegro.tech.servicemesh.envoycontrol.reliability.ToxiproxyContainer.Companion.internalToxiproxyPort +import pl.allegro.tech.servicemesh.envoycontrol.testcontainers.GenericContainer.Companion.allInterfaces + +@Testcontainers +internal class Toxiproxy { + companion object { + val toxiproxyGrpcPort = Ports.nextAvailable() + val toxiproxyGrpcPort2 = Ports.nextAvailable() + val ec1HttpPort = Ports.nextAvailable() + val ec2HttpPort = Ports.nextAvailable() + private const val internalConsulProxyPort = 1337 + private const val internalEnvoyControl1GrpcPort = 1338 + private const val internalEnvoyControl2GrpcPort = 1339 + private const val internalEnvoyControl1HttpPort = 1340 + private const val internalEnvoyControl2HttpPort = 1341 + + val toxiContainer: ToxiproxyContainer = ToxiproxyContainer() + .withNetwork(network) + .withExposedPorts( + internalToxiproxyPort, + internalConsulProxyPort, + internalEnvoyControl1GrpcPort, + internalEnvoyControl2GrpcPort, + internalEnvoyControl1HttpPort, + internalEnvoyControl2HttpPort + ) + + init { + toxiContainer.start() + } + private val client = ToxiproxyClient("localhost", toxiContainer.getMappedPort(internalToxiproxyPort)) + + val consulProxy: Proxy = client.createProxy( + "consul", + "$allInterfaces:$internalConsulProxyPort", + "${consul.ipAddress()}:${consul.internalPort}" + ) + val externalConsulPort: Int = toxiContainer.getMappedPort(internalConsulProxyPort) + val externalEnvoyControl1GrpcPort: Int = toxiContainer.getMappedPort(internalEnvoyControl1GrpcPort) + val externalEnvoyControl2GrpcPort: Int = toxiContainer.getMappedPort(internalEnvoyControl2GrpcPort) + val externalEnvoyControl1HttpPort: Int = toxiContainer.getMappedPort(internalEnvoyControl1HttpPort) + val externalEnvoyControl2HttpPort: Int = toxiContainer.getMappedPort(internalEnvoyControl2HttpPort) + + val envoyControl1Proxy: Proxy = client.createProxy( + "envoyControl1", + "$allInterfaces:$internalEnvoyControl1GrpcPort", + "${toxiContainer.hostIp()}:$toxiproxyGrpcPort" + ) + + val envoyControl2Proxy: Proxy = client.createProxy( + "envoyControl2", + "$allInterfaces:$internalEnvoyControl2GrpcPort", + "${toxiContainer.hostIp()}:$toxiproxyGrpcPort2" + ) + + val envoyControl1HttpProxy: Proxy = client.createProxy( + "ec1ToEc2", + "$allInterfaces:$internalEnvoyControl1HttpPort", + "${toxiContainer.hostIp()}:$ec1HttpPort" + ) + + val envoyControl2HttpProxy: Proxy = client.createProxy( + "ec2ToEc1", + "$allInterfaces:$internalEnvoyControl2HttpPort", + "${toxiContainer.hostIp()}:$ec2HttpPort" + ) + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ToxiproxyContainer.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ToxiproxyContainer.kt new file mode 100644 index 000000000..54494d98f --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/reliability/ToxiproxyContainer.kt @@ -0,0 +1,17 @@ +package pl.allegro.tech.servicemesh.envoycontrol.reliability + +import org.testcontainers.containers.wait.strategy.Wait +import pl.allegro.tech.servicemesh.envoycontrol.testcontainers.GenericContainer + +class ToxiproxyContainer : + GenericContainer("shopify/toxiproxy:latest") { + + companion object { + const val internalToxiproxyPort = 8474 + } + + override fun configure() { + super.configure() + waitingFor(Wait.forHttp("/version").forPort(internalToxiproxyPort)) + } +} diff --git a/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/testcontainers/GenericContainer.kt b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/testcontainers/GenericContainer.kt new file mode 100644 index 000000000..41e4ce1c0 --- /dev/null +++ b/envoy-control-tests/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/testcontainers/GenericContainer.kt @@ -0,0 +1,113 @@ +package pl.allegro.tech.servicemesh.envoycontrol.testcontainers + +import org.testcontainers.containers.BindMode +import org.testcontainers.containers.Network +import org.testcontainers.images.builder.ImageFromDockerfile +import org.testcontainers.containers.GenericContainer as BaseGenericContainer + +open class GenericContainer> : BaseGenericContainer { + constructor(image: ImageFromDockerfile) : super(image) + constructor(dockerImageName: String) : super(dockerImageName) + + private val HOST_IP_SCRIPT = "testcontainers/host_ip.sh" + private val HOST_IP_SCRIPT_DEST = "/usr/local/bin/host_ip.sh" + + companion object { + const val allInterfaces = "0.0.0.0" + } + + override fun configure() { + super.configure() + withClasspathResourceMapping(HOST_IP_SCRIPT, HOST_IP_SCRIPT_DEST, BindMode.READ_ONLY) + } + + override fun withClasspathResourceMapping( + resourcePath: String?, + containerPath: String?, + mode: BindMode? + ): SELF { + return if (notAlreadyMounted(containerPath)) { + super.withClasspathResourceMapping(resourcePath, containerPath, mode) + } else { + this.self() + } + } + + fun notAlreadyMounted(destination: String?) = binds.none { it.volume.path == destination } + + fun hostIp(): String { + val result = execInContainer(HOST_IP_SCRIPT_DEST) + + if (result.stderr.isNotEmpty() or result.stdout.isEmpty()) { + throw ContainerUnableToObtainHostIpException() + } + + return result.stdout.trim() + } + + fun ipAddress(): String = + containerInfo + .networkSettings + .networks[(network as Network.NetworkImpl).name]!! + .ipAddress + + open fun sigstop() { + sendSignal("STOP") + } + + open fun sigcont() { + sendSignal("CONT") + } + + fun blockTrafficTo(ip: String) { + runCommands( + arrayOf( + "iptables -A INPUT -s $ip -j DROP", + "iptables -A OUTPUT -d $ip -j DROP" + ) + ) + } + + fun unblockTrafficTo(ip: String) { + runCommands( + arrayOf( + "iptables -D INPUT -s $ip -j DROP", + "iptables -D OUTPUT -d $ip -j DROP" + ) + ) + } + + fun clearAllIptablesRules() { + runCommands( + arrayOf( + "iptables -t nat -F", + "iptables -t mangle -F", + "iptables -F", + "iptables -X" + ) + ) + } + + fun runCommands(commands: Array) { + commands.forEach { command -> + execInContainer(*(command.split(" ").toTypedArray())) + } + } + + fun restart() = dockerClient.restartContainerCmd(containerId).exec() + + private fun sendSignal(signal: String) { + getDockerClient() + .killContainerCmd(getContainerId()) + .withSignal(signal) + .exec() + } + + /** + * The container host name is randomly generated based on the first 12 characters of the container ID. + * https://developer.ibm.com/articles/dm-1602-db2-docker-trs/ + */ + fun containerName() = containerId.substring(0, 12) +} + +class ContainerUnableToObtainHostIpException : RuntimeException() diff --git a/envoy-control-tests/src/main/resources/envoy/config_ads.yaml b/envoy-control-tests/src/main/resources/envoy/config_ads.yaml new file mode 100644 index 000000000..80259702a --- /dev/null +++ b/envoy-control-tests/src/main/resources/envoy/config_ads.yaml @@ -0,0 +1,102 @@ +admin: + access_log_path: /dev/null + address: + socket_address: { address: 0.0.0.0, port_value: 10000 } +dynamic_resources: + lds_config: {ads: {}} + cds_config: {ads: {}} + ads_config: + api_type: GRPC + grpc_services: + envoy_grpc: + cluster_name: envoy-control-xds +node: + cluster: test-cluster + id: test-id + metadata: + ads: true + proxy_settings: + incoming: + endpoints: + - path: "/endpoint" + clients: ["authorizedClient"] + outgoing: + dependencies: + - service: "service-1" + - service: "service-2" + - service: "service-3" + - service: "service-4" + - service: "service-5" + - service: "echo" + - service: "consul" + - domain: "https://www.example.com" + +static_resources: + clusters: + - connect_timeout: 1s + hosts: + - socket_address: + address: HOST_IP + port_value: HOST_PORT + - socket_address: + address: HOST_IP + port_value: HOST2_PORT + http2_protocol_options: {} + name: envoy-control-xds + - name: envoy-original-destination + type: ORIGINAL_DST + lb_policy: ORIGINAL_DST_LB + original_dst_lb_config: + use_http_header: true + connect_timeout: + seconds: 1 + http_protocol_options: + allow_absolute_url: true + - name: local_service + type: STATIC + hosts: + - socket_address: + address: LOCAL_SERVICE_IP + port_value: 5678 + connect_timeout: 1s + - name: this_admin + type: STATIC + hosts: + - socket_address: + address: 127.0.0.1 + port_value: 10000 + connect_timeout: + seconds: 1 + listeners: + - name: default_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5000 + filter_chains: + filters: + - name: envoy.http_connection_manager + config: + stat_prefix: egress_http + rds: + route_config_name: default_routes + config_source: + ads: {} + http_filters: + - name: envoy.router + - name: ingress_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5001 + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + stat_prefix: ingress_http + rds: + route_config_name: ingress_secured_routes + config_source: + ads: {} + http_filters: + - name: envoy.router \ No newline at end of file diff --git a/envoy-control-tests/src/main/resources/envoy/config_ads_all_dependencies.yaml b/envoy-control-tests/src/main/resources/envoy/config_ads_all_dependencies.yaml new file mode 100644 index 000000000..562d733a3 --- /dev/null +++ b/envoy-control-tests/src/main/resources/envoy/config_ads_all_dependencies.yaml @@ -0,0 +1,91 @@ +admin: + access_log_path: /dev/null + address: + socket_address: { address: 0.0.0.0, port_value: 10000 } +dynamic_resources: + lds_config: {ads: {}} + cds_config: {ads: {}} + ads_config: + api_type: GRPC + grpc_services: + envoy_grpc: + cluster_name: envoy-control-xds +node: + cluster: test-cluster + id: test-id + metadata: + ads: true + service_name: test-service + proxy_settings: + outgoing: + dependencies: + - service: "*" + - domain: "https://www.example.com" + + +static_resources: + clusters: + - connect_timeout: 1s + hosts: + - socket_address: + address: HOST_IP + port_value: HOST_PORT + http2_protocol_options: {} + name: envoy-control-xds + - name: envoy-original-destination + type: ORIGINAL_DST + lb_policy: ORIGINAL_DST_LB + original_dst_lb_config: + use_http_header: true + connect_timeout: + seconds: 1 + http_protocol_options: + allow_absolute_url: true + - name: local_service + type: STATIC + hosts: + - socket_address: + address: LOCAL_SERVICE_IP + port_value: 5678 + connect_timeout: 1s + - name: this_admin + type: STATIC + hosts: + - socket_address: + address: 127.0.0.1 + port_value: 10000 + connect_timeout: + seconds: 1 + listeners: + - name: default_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5000 + filter_chains: + filters: + - name: envoy.http_connection_manager + config: + stat_prefix: egress_http + rds: + route_config_name: default_routes + config_source: + ads: {} + http_filters: + - name: envoy.router + - name: ingress_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5001 + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + stat_prefix: ingress_http + rds: + route_config_name: ingress_secured_routes + config_source: + ads: {} + http_filters: + - name: envoy.router diff --git a/envoy-control-tests/src/main/resources/envoy/config_xds.yaml b/envoy-control-tests/src/main/resources/envoy/config_xds.yaml new file mode 100644 index 000000000..b1d87850d --- /dev/null +++ b/envoy-control-tests/src/main/resources/envoy/config_xds.yaml @@ -0,0 +1,108 @@ +admin: + access_log_path: /dev/null + address: + socket_address: { address: 0.0.0.0, port_value: 10000 } +dynamic_resources: + cds_config: + api_config_source: + api_type: GRPC + grpc_services: + envoy_grpc: + cluster_name: envoy-control-xds +node: + cluster: test-cluster + id: test-id + metadata: + proxy_settings: + incoming: + endpoints: + - path: "/endpoint" + clients: ["authorizedClient"] + outgoing: + dependencies: + - service: "service-1" + - service: "service-2" + - service: "service-3" + - service: "service-4" + - service: "service-5" + - service: "echo" + - service: "consul" + - domain: "https://www.example.com" + +static_resources: + clusters: + - connect_timeout: 1s + hosts: + - socket_address: + address: HOST_IP + port_value: HOST_PORT + - socket_address: + address: HOST_IP + port_value: HOST2_PORT + http2_protocol_options: {} + name: envoy-control-xds + - name: envoy-original-destination + type: ORIGINAL_DST + lb_policy: ORIGINAL_DST_LB + original_dst_lb_config: + use_http_header: true + connect_timeout: + seconds: 1 + http_protocol_options: + allow_absolute_url: true + - name: local_service + type: STATIC + hosts: + - socket_address: + address: LOCAL_SERVICE_IP + port_value: 5678 + connect_timeout: 1s + - name: this_admin + type: STATIC + hosts: + - socket_address: + address: 127.0.0.1 + port_value: 10000 + connect_timeout: + seconds: 1 + listeners: + - name: default_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5000 + filter_chains: + filters: + - name: envoy.http_connection_manager + config: + stat_prefix: egress_http + rds: + route_config_name: default_routes + config_source: + api_config_source: + api_type: GRPC + grpc_services: + - envoy_grpc: + cluster_name: envoy-control-xds + http_filters: + - name: envoy.router + - name: ingress_listener + address: + socket_address: + address: 0.0.0.0 + port_value: 5001 + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + stat_prefix: ingress_http + rds: + route_config_name: ingress_secured_routes + config_source: + api_config_source: + api_type: GRPC + grpc_services: + - envoy_grpc: + cluster_name: envoy-control-xds + http_filters: + - name: envoy.router \ No newline at end of file diff --git a/envoy-control-tests/src/main/resources/envoy/launch_envoy.sh b/envoy-control-tests/src/main/resources/envoy/launch_envoy.sh new file mode 100755 index 000000000..4cf3175f0 --- /dev/null +++ b/envoy-control-tests/src/main/resources/envoy/launch_envoy.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +set -eu + +HOST_IP=$(sh /usr/local/bin/host_ip.sh) +HOST_PORT=$1 +HOST2_PORT=$2 + +CONFIG=$(cat $3) +CONFIG_DIR=$(mktemp -d) +CONFIG_FILE="$CONFIG_DIR/envoy.yaml" + +LOCAL_SERVICE_IP="$4" + +echo "${CONFIG}" | sed \ + -e "s/HOST_IP/${HOST_IP}/g" \ + -e "s/HOST_PORT/${HOST_PORT}/g" \ + -e "s/HOST2_PORT/${HOST2_PORT}/g" \ + -e "s/LOCAL_SERVICE_IP/${LOCAL_SERVICE_IP}/g" \ + > "${CONFIG_FILE}" +cat "${CONFIG_FILE}" + +shift 4 +/usr/local/bin/envoy --drain-time-s 1 -c "${CONFIG_FILE}" "$@" + +rm -rf "${CONFIG_DIR}" diff --git a/envoy-control-tests/src/main/resources/testcontainers/consul-low-rpc-rate.json b/envoy-control-tests/src/main/resources/testcontainers/consul-low-rpc-rate.json new file mode 100644 index 000000000..a4072c733 --- /dev/null +++ b/envoy-control-tests/src/main/resources/testcontainers/consul-low-rpc-rate.json @@ -0,0 +1,6 @@ +{ + "limits": { + "rpc_rate": 1, + "rpc_max_burst": 1 + } +} \ No newline at end of file diff --git a/envoy-control-tests/src/main/resources/testcontainers/host_ip.sh b/envoy-control-tests/src/main/resources/testcontainers/host_ip.sh new file mode 100755 index 000000000..a6e1792e8 --- /dev/null +++ b/envoy-control-tests/src/main/resources/testcontainers/host_ip.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +set -eu + +# Figuring out the IP of the docker host machine is a convoluted process. On Mac we pull that value from the special +# host.docker.internal hostname. Linux does not support that yet, so we have to use the routing table. +# +# See https://github.com/docker/for-linux/issues/264 to track host.docker.internal support on linux. +HOST_DOMAIN_IP="$(getent hosts host.docker.internal | awk '{ print $1 }')" + +if [[ ! -z "${HOST_DOMAIN_IP}" ]]; then + printf "${HOST_DOMAIN_IP}" +else + printf "$(ip route | awk '/default/ { print $3 }')" +fi diff --git a/envoy-control/build.gradle b/envoy-control/build.gradle new file mode 100644 index 000000000..71d7c2e75 --- /dev/null +++ b/envoy-control/build.gradle @@ -0,0 +1,44 @@ +dependencies { + implementation project(':services') + + compile group: 'org.jetbrains.kotlin', name: 'kotlin-stdlib', version: versions.kotlin + compile group: 'com.fasterxml.jackson.module', name: 'jackson-module-afterburner', version: versions.jackson + compile group: 'com.fasterxml.jackson.module', name: 'jackson-module-kotlin', version: versions.jackson + compile group: 'org.jetbrains.kotlin', name: 'kotlin-reflect', version: versions.kotlin + compile group: 'io.dropwizard.metrics', name: 'metrics-core', version: versions.dropwizard + compile group: 'io.micrometer', name: 'micrometer-core', version: versions.micrometer + + compile group: 'io.envoyproxy.controlplane', name: 'server', version: versions.java_controlplane + + compile group: 'io.grpc', name: 'grpc-netty', version: versions.grpc + + compile group: 'io.projectreactor', name: 'reactor-core', version: versions.reactor + + compile group: 'org.slf4j', name: 'jcl-over-slf4j', version: versions.slf4j + compile group: 'ch.qos.logback', name: 'logback-classic', version: versions.logback + + testCompile group: 'io.grpc', name: 'grpc-testing', version: versions.grpc + testCompile group: 'io.projectreactor', name: 'reactor-test', version: versions.reactor + testCompile group: 'org.mockito', name: 'mockito-core', version: versions.mockito + testCompile group: 'cglib', name: 'cglib-nodep', version: versions.cglib + + testCompile group: 'org.awaitility', name: 'awaitility', version: versions.awaitility + + testImplementation group: 'org.testcontainers', name: 'testcontainers', version: versions.testcontainers + testImplementation group: 'org.testcontainers', name: 'junit-jupiter', version: versions.testcontainers +} + +tasks.withType(GroovyCompile) { + groovyOptions.optimizationOptions.indy = true + options.encoding = 'UTF-8' +} + +tasks.withType(JavaCompile) { + options.encoding = 'UTF-8' +} + +test { + maxParallelForks = 1 + useJUnitPlatform() +} + diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ControlPlane.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ControlPlane.kt new file mode 100644 index 000000000..f581c8d2a --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/ControlPlane.kt @@ -0,0 +1,221 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import io.envoyproxy.controlplane.cache.NodeGroup +import io.envoyproxy.controlplane.cache.SimpleCache +import io.envoyproxy.controlplane.server.DefaultExecutorGroup +import io.envoyproxy.controlplane.server.DiscoveryServer +import io.envoyproxy.controlplane.server.ExecutorGroup +import io.envoyproxy.controlplane.server.callback.SnapshotCollectingCallback +import io.grpc.Server +import io.grpc.netty.NettyServerBuilder +import io.micrometer.core.instrument.MeterRegistry +import io.netty.channel.nio.NioEventLoopGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.groups.GroupChangeWatcher +import pl.allegro.tech.servicemesh.envoycontrol.groups.MetadataNodeGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.NodeMetadataValidator +import pl.allegro.tech.servicemesh.envoycontrol.server.CachedProtoResourcesSerializer +import pl.allegro.tech.servicemesh.envoycontrol.server.ExecutorType +import pl.allegro.tech.servicemesh.envoycontrol.server.ServerProperties +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.CompositeDiscoveryServerCallbacks +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.LoggingDiscoveryServerCallbacks +import pl.allegro.tech.servicemesh.envoycontrol.server.callbacks.MeteredConnectionsCallbacks +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.SnapshotUpdater +import reactor.core.Disposable +import reactor.core.publisher.Flux +import reactor.core.scheduler.Schedulers +import java.time.Clock +import java.util.concurrent.Executor +import java.util.concurrent.Executors +import java.util.concurrent.LinkedBlockingQueue +import java.util.concurrent.ThreadFactory +import java.util.concurrent.ThreadPoolExecutor +import java.util.concurrent.TimeUnit +import java.util.concurrent.atomic.AtomicInteger + +class ControlPlane private constructor( + val grpcServer: Server, + val snapshotUpdater: SnapshotUpdater, + private val changes: Flux> +) : AutoCloseable { + + private var servicesDisposable: Disposable? = null + + companion object { + fun builder(properties: EnvoyControlProperties, meterRegistry: MeterRegistry) = + ControlPlaneBuilder(properties, meterRegistry) + } + + fun start() { + servicesDisposable = snapshotUpdater + .start(changes) + .subscribe() + grpcServer.start() + } + + override fun close() { + servicesDisposable?.dispose() + grpcServer.shutdownNow() + grpcServer.awaitTermination() + } + + class ControlPlaneBuilder( + val properties: EnvoyControlProperties, + val meterRegistry: MeterRegistry + ) { + var grpcServerExecutor: Executor? = null + var nioEventLoopExecutor: Executor? = null + var executorGroup: ExecutorGroup? = null + var updateSnapshotExecutor: Executor? = null + var metrics: EnvoyControlMetrics = DefaultEnvoyControlMetrics() + + var nodeGroup: NodeGroup = MetadataNodeGroup( + allServicesDependenciesValue = properties.envoy.snapshot.outgoingPermissions.allServicesDependenciesValue, + outgoingPermissions = properties.envoy.snapshot.outgoingPermissions.enabled, + incomingPermissions = properties.envoy.snapshot.incomingPermissions.enabled + ) + + fun build(changes: Flux>): ControlPlane { + if (grpcServerExecutor == null) { + grpcServerExecutor = ThreadPoolExecutor( + properties.server.serverPoolSize, + properties.server.serverPoolSize, + properties.server.serverPoolKeepAlive.toMillis(), TimeUnit.MILLISECONDS, + LinkedBlockingQueue(), + ThreadNamingThreadFactory("grpc-server-worker") + ) + } + + if (nioEventLoopExecutor == null) { + // unbounded executor - netty will only use configured number of threads + // (by nioEventLoopThreadCount property or default netty value: * 2) + nioEventLoopExecutor = Executors.newCachedThreadPool( + ThreadNamingThreadFactory("grpc-worker-event-loop") + ) + } + + if (executorGroup == null) { + executorGroup = when (properties.server.executorGroup.type) { + ExecutorType.DIRECT -> DefaultExecutorGroup() + ExecutorType.PARALLEL -> { + val executor = Executors.newFixedThreadPool( + properties.server.executorGroup.parallelPoolSize, + ThreadNamingThreadFactory("discovery-responses-executor") + ) + ExecutorGroup { executor } + } + } + } + + if (updateSnapshotExecutor == null) { + updateSnapshotExecutor = Executors.newSingleThreadExecutor(ThreadNamingThreadFactory("snapshot-update")) + } + + val cache = SimpleCache(nodeGroup) + + val cleanupProperties = properties.server.snapshotCleanup + + val groupChangeWatcher = GroupChangeWatcher(cache, metrics) + + val discoveryServer = DiscoveryServer( + listOf( + CompositeDiscoveryServerCallbacks( + meterRegistry, + SnapshotCollectingCallback( + cache, + nodeGroup, + Clock.systemDefaultZone(), + emptySet(), + cleanupProperties.collectAfterMillis.toMillis(), + cleanupProperties.collectionIntervalMillis.toMillis() + ), + LoggingDiscoveryServerCallbacks(), + MeteredConnectionsCallbacks().also { + meterRegistry.gauge("grpc.all-connections", it.connections) + MeteredConnectionsCallbacks.MetricsStreamType.values().map { type -> + meterRegistry.gauge("grpc.connections.${type.name.toLowerCase()}", it.connections(type)) + } + }, + NodeMetadataValidator(properties.envoy.snapshot.outgoingPermissions) + ) + ), + groupChangeWatcher, + executorGroup, + CachedProtoResourcesSerializer() + ) + + return ControlPlane( + grpcServer(properties.server, discoveryServer, nioEventLoopExecutor!!, grpcServerExecutor!!), + SnapshotUpdater( + cache, + properties.envoy.snapshot, + Schedulers.fromExecutor(updateSnapshotExecutor!!), + groupChangeWatcher.onGroupAdded() + ), + changes + ) + } + + fun withNodeGroup(nodeGroup: NodeGroup): ControlPlaneBuilder { + this.nodeGroup = nodeGroup + return this + } + + fun withGrpcServerExecutor(executor: Executor): ControlPlaneBuilder { + grpcServerExecutor = executor + return this + } + + fun withNioEventLoopExecutor(executor: Executor): ControlPlaneBuilder { + nioEventLoopExecutor = executor + return this + } + + fun withExecutorGroup(executor: ExecutorGroup): ControlPlaneBuilder { + executorGroup = executor + return this + } + + fun withUpdateSnapshotExecutor(executor: Executor): ControlPlaneBuilder { + updateSnapshotExecutor = executor + return this + } + + fun withMetrics(metrics: EnvoyControlMetrics): ControlPlaneBuilder { + this.metrics = metrics + return this + } + + private fun NettyServerBuilder.withEnvoyServices(discoveryServer: DiscoveryServer): NettyServerBuilder = + this.addService(discoveryServer.aggregatedDiscoveryServiceImpl) + .addService(discoveryServer.clusterDiscoveryServiceImpl) + .addService(discoveryServer.endpointDiscoveryServiceImpl) + .addService(discoveryServer.listenerDiscoveryServiceImpl) + .addService(discoveryServer.routeDiscoveryServiceImpl) + + private class ThreadNamingThreadFactory(val threadNamePrefix: String) : ThreadFactory { + private val counter = AtomicInteger() + override fun newThread(r: Runnable) = Thread(r, "$threadNamePrefix-${counter.getAndIncrement()}") + } + + private fun grpcServer( + config: ServerProperties, + discoveryServer: DiscoveryServer, + nioEventLoopExecutor: Executor, + grpcServerExecutor: Executor + ): Server = NettyServerBuilder.forPort(config.port) + .workerEventLoopGroup( + NioEventLoopGroup( + config.nioEventLoopThreadCount, + nioEventLoopExecutor + ) + ) + .executor(grpcServerExecutor) + .keepAliveTime(config.netty.keepAliveTime.toMillis(), TimeUnit.MILLISECONDS) + .permitKeepAliveTime(config.netty.permitKeepAliveTime.toMillis(), TimeUnit.MILLISECONDS) + .permitKeepAliveWithoutCalls(config.netty.permitKeepAliveWithoutCalls) + .withEnvoyServices(discoveryServer) + .build() + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlMetrics.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlMetrics.kt new file mode 100644 index 000000000..d6a3102d4 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlMetrics.kt @@ -0,0 +1,46 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import java.util.concurrent.atomic.AtomicInteger + +interface EnvoyControlMetrics { + fun serviceRemoved() + fun serviceAdded() + fun instanceChanged() + fun snapshotChanged() + fun setCacheGroupsCount(count: Int) + fun errorWatchingServices() +} + +data class DefaultEnvoyControlMetrics( + val servicesRemoved: AtomicInteger = AtomicInteger(), + val servicesAdded: AtomicInteger = AtomicInteger(), + val instanceChanges: AtomicInteger = AtomicInteger(), + val snapshotChanges: AtomicInteger = AtomicInteger(), + val cacheGroupsCount: AtomicInteger = AtomicInteger(), + val errorWatchingServices: AtomicInteger = AtomicInteger() +) : EnvoyControlMetrics { + + override fun errorWatchingServices() { + errorWatchingServices.incrementAndGet() + } + + override fun serviceRemoved() { + servicesRemoved.incrementAndGet() + } + + override fun serviceAdded() { + servicesAdded.incrementAndGet() + } + + override fun instanceChanged() { + instanceChanges.incrementAndGet() + } + + override fun snapshotChanged() { + snapshotChanges.incrementAndGet() + } + + override fun setCacheGroupsCount(count: Int) { + cacheGroupsCount.set(count) + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlProperties.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlProperties.kt new file mode 100644 index 000000000..e328ac8ba --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/EnvoyControlProperties.kt @@ -0,0 +1,22 @@ +@file:Suppress("MagicNumber") + +package pl.allegro.tech.servicemesh.envoycontrol + +import pl.allegro.tech.servicemesh.envoycontrol.server.ServerProperties +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.SnapshotProperties +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.SyncProperties + +class EnvoyControlProperties { + var server = ServerProperties() + var envoy = EnvoyProperties() + var sync = SyncProperties() + var serviceFilters = ServiceFilters() +} + +class EnvoyProperties { + var snapshot = SnapshotProperties() +} + +class ServiceFilters { + var excludedNamesPatterns: List = emptyList() +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/Logger.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/Logger.kt new file mode 100644 index 000000000..8e8de0e06 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/Logger.kt @@ -0,0 +1,14 @@ +package pl.allegro.tech.servicemesh.envoycontrol + +import org.slf4j.Logger +import org.slf4j.LoggerFactory +import kotlin.reflect.full.companionObject + +fun R.logger(): Lazy = lazy { LoggerFactory.getLogger(unwrapCompanionClass(this.javaClass).name) } + +fun unwrapCompanionClass(ofClass: Class): Class<*> { + return if (ofClass.enclosingClass != null && ofClass.enclosingClass.kotlin.companionObject?.java == ofClass) + ofClass.enclosingClass + else + ofClass +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/GroupChangeWatcher.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/GroupChangeWatcher.kt new file mode 100644 index 000000000..9927d7cac --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/GroupChangeWatcher.kt @@ -0,0 +1,49 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import io.envoyproxy.controlplane.cache.ConfigWatcher +import io.envoyproxy.controlplane.cache.Response +import io.envoyproxy.controlplane.cache.SimpleCache +import io.envoyproxy.controlplane.cache.Watch +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlMetrics +import reactor.core.publisher.Flux +import reactor.core.publisher.FluxSink +import java.util.function.Consumer + +/** + * This class is needed to force snapshot creation in SnapshotUpdater when new group is added. + * Otherwise when Envoy with new group is connected it won't receive the snapshot immediately. + * In this situation, when there are no changes from ServiceChanges we won't send anything to Envoy. + * When Envoy doesn't receive any snapshot from Envoy Control, it is stuck in PRE_INITIALIZING state. + */ +internal class GroupChangeWatcher( + private val cache: SimpleCache, + private val metrics: EnvoyControlMetrics +) : ConfigWatcher { + private val groupAddedFlux: Flux = Flux.create { groupAddedSink = it } + private var groupAddedSink: FluxSink? = null + + fun onGroupAdded(): Flux { + return groupAddedFlux + } + + override fun createWatch( + ads: Boolean, + request: DiscoveryRequest, + knownResourceNames: MutableSet, + responseConsumer: Consumer + ): Watch { + val oldGroups = cache.groups() + val watch = cache.createWatch(ads, request, knownResourceNames, responseConsumer) + val groups = cache.groups() + metrics.setCacheGroupsCount(groups.size) + if (oldGroups != groups) { + emitNewGroupsEvent() + } + return watch + } + + private fun emitNewGroupsEvent() { + groupAddedSink?.next(true) + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/Groups.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/Groups.kt new file mode 100644 index 000000000..0fb953281 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/Groups.kt @@ -0,0 +1,27 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +sealed class Group { + abstract val ads: Boolean + abstract val serviceName: String + abstract val proxySettings: ProxySettings + + open fun isGlobalGroup() = false +} + +data class ServicesGroup( + override val ads: Boolean, + override val serviceName: String = "", + override val proxySettings: ProxySettings = ProxySettings() +) : Group() + +data class AllServicesGroup( + override val ads: Boolean, + override val serviceName: String = "", + override val proxySettings: ProxySettings = ProxySettings() +) : Group() { + /** + * Global group is a base group for all other groups. First we generate the global groups from a snapshot, + * then generate all other groups using data from global groups. + */ + override fun isGlobalGroup() = serviceName == "" && proxySettings.isEmpty() +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroup.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroup.kt new file mode 100644 index 000000000..376304d93 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroup.kt @@ -0,0 +1,52 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import io.envoyproxy.controlplane.cache.NodeGroup +import io.envoyproxy.envoy.api.v2.core.Node + +class MetadataNodeGroup( + val allServicesDependenciesValue: String = "*", + val outgoingPermissions: Boolean, + val incomingPermissions: Boolean = false +) : NodeGroup { + + override fun hash(node: Node): Group { + val ads = node.metadata + .fieldsMap["ads"] + ?.boolValue + ?: false + + return createGroup(node, ads) + } + + private fun createGroup(node: Node, ads: Boolean): Group { + val metadata = NodeMetadata(node.metadata) + val serviceName = serviceName(metadata) + val proxySettings = proxySettings(metadata) + + return when { + hasAllServicesDependencies(metadata) -> + AllServicesGroup(ads, serviceName(metadata), proxySettings(metadata)) + else -> + ServicesGroup(ads, serviceName, proxySettings) + } + } + + private fun hasAllServicesDependencies(metadata: NodeMetadata): Boolean { + return !outgoingPermissions || + metadata.proxySettings.outgoing.containsDependencyForService(allServicesDependenciesValue) + } + + private fun serviceName(metadata: NodeMetadata): String { + return when (incomingPermissions) { + true -> metadata.serviceName.orEmpty() + false -> "" + } + } + + private fun proxySettings(metadata: NodeMetadata): ProxySettings { + return when (incomingPermissions) { + true -> metadata.proxySettings + false -> metadata.proxySettings.withIncomingPermissionsDisabled() + } + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt new file mode 100644 index 000000000..367a9c096 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadata.kt @@ -0,0 +1,192 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import com.google.protobuf.Duration +import com.google.protobuf.Struct +import com.google.protobuf.Value +import com.google.protobuf.util.Durations +import io.envoyproxy.controlplane.server.exception.RequestException +import io.grpc.Status +import java.net.URL + +open class NodeMetadataValidationException(message: String) + : RequestException(Status.INVALID_ARGUMENT.withDescription(message)) + +class NodeMetadata(metadata: Struct) { + val serviceName: String? = metadata + .fieldsMap["service_name"] + ?.stringValue + + val proxySettings: ProxySettings = ProxySettings(metadata.fieldsMap["proxy_settings"]) +} + +data class ProxySettings( + val incoming: Incoming = Incoming(), + val outgoing: Outgoing = Outgoing() +) { + constructor(proto: Value?) : this( + incoming = proto?.field("incoming").toIncoming(), + outgoing = proto?.field("outgoing").toOutgoing() + ) + + fun isEmpty() = this == ProxySettings() + + fun withIncomingPermissionsDisabled(): ProxySettings = copy( + incoming = incoming.copy( + permissionsEnabled = false, + endpoints = emptyList(), + roles = emptyList() + ) + ) +} + +private fun Value?.toOutgoing(): Outgoing { + return Outgoing( + dependencies = this?.field("dependencies")?.list().orEmpty().map { it.toDependency() } + ) +} + +fun Value.toDependency(): Dependency { + val service = this.field("service")?.stringValue + val domain = this.field("domain")?.stringValue + + return when { + service == null && domain == null || service != null && domain != null -> + throw NodeMetadataValidationException( + "Define either 'service' or 'domain' as an outgoing dependency" + ) + service != null -> ServiceDependency(service) + domain.orEmpty().startsWith("http://") -> DomainDependency(domain.orEmpty()) + domain.orEmpty().startsWith("https://") -> DomainDependency(domain.orEmpty()) + else -> throw NodeMetadataValidationException( + "Unsupported protocol for domain dependency for domain $domain" + ) + } +} + +private fun Value?.toIncoming(): Incoming { + val endpointsField = this?.field("endpoints")?.list() + return Incoming( + endpoints = endpointsField.orEmpty().map { it.toIncomingEndpoint() }, + // if there is no endpoint field defined in metadata, we allow for all traffic + permissionsEnabled = endpointsField != null, + roles = this?.field("roles")?.list().orEmpty().map { Role(it) }, + timeoutPolicy = this?.field("timeoutPolicy").toTimeoutPolicy() + ) +} + +fun Value.toIncomingEndpoint(): IncomingEndpoint { + val pathPrefix = this.field("pathPrefix")?.stringValue + val path = this.field("path")?.stringValue + + if (pathPrefix != null && path != null) { + throw NodeMetadataValidationException("Precisely one of 'path' and 'pathPrefix' field is allowed") + } + + val methods = this.field("methods")?.list().orEmpty().map { it.stringValue }.toSet() + val clients = this.field("clients")?.list().orEmpty().map { it.stringValue }.toSet() + + return when { + path != null -> IncomingEndpoint(path, PathMatchingType.PATH, methods, clients) + pathPrefix != null -> IncomingEndpoint(pathPrefix, PathMatchingType.PATH_PREFIX, methods, clients) + else -> throw NodeMetadataValidationException("One of 'path' or 'pathPrefix' field is required") + } +} + +private fun Value?.toTimeoutPolicy(): TimeoutPolicy { + val idleTimeout: Duration? = this?.field("idleTimeout")?.stringValue + ?.takeIf { it.isNotBlank() } + ?.let { Durations.parse(it) } + val responseTimeout: Duration? = this?.field("responseTimeout")?.stringValue + ?.takeIf { it.isNotBlank() } + ?.let { Durations.parse(it) } + + return TimeoutPolicy(idleTimeout, responseTimeout) +} + +data class Incoming( + val endpoints: List = emptyList(), + val permissionsEnabled: Boolean = false, + val roles: List = emptyList(), + val timeoutPolicy: TimeoutPolicy = TimeoutPolicy(idleTimeout = null, responseTimeout = null) +) + +data class Outgoing( + val dependencies: List = emptyList() +) { + fun containsDependencyForService(service: String) = serviceDependencies.containsKey(service) + + // not declared in primary constructor to exclude from equals(), copy(), etc. + private val domainDependencies: Map = dependencies + .filterIsInstance() + .map { it.domain to it } + .toMap() + + private val serviceDependencies: Map = dependencies + .filterIsInstance() + .map { it.service to it } + .toMap() + + fun getDomainDependencies(): Collection = domainDependencies.values + + fun getServiceDependencies(): Collection = serviceDependencies.values +} + +interface Dependency + +data class ServiceDependency(val service: String) : Dependency + +data class DomainDependency(val domain: String) : Dependency { + val uri = URL(domain) + + fun getPort(): Int = uri.port.takeIf { it != -1 } ?: uri.defaultPort + + fun getHost(): String = uri.host + + fun useSsl() = uri.protocol == "https" + + fun getClusterName(): String { + val clusterName = getHost() + ":" + getPort() + return clusterName.replace(".", "_").replace(":", "_") + } + + fun getRouteDomain(): String = if (uri.port != -1) getHost() + ":" + getPort() else getHost() +} + +data class Role( + val name: String?, + val clients: Set +) { + constructor(proto: Value) : this( + name = proto.field("name")?.stringValue, + clients = proto.field("clients")?.list().orEmpty().map { it.stringValue }.toSet() + ) +} + +data class TimeoutPolicy( + val idleTimeout: Duration?, + val responseTimeout: Duration? +) + +data class IncomingEndpoint( + override val path: String = "", + override val pathMatchingType: PathMatchingType = PathMatchingType.PATH, + override val methods: Set = emptySet(), + val clients: Set = emptySet() +) : EndpointBase + +enum class PathMatchingType { + PATH, PATH_PREFIX +} + +interface EndpointBase { + val path: String + val pathMatchingType: PathMatchingType + val methods: Set +} + +// We don't distinguish between absence of the field and the field with explicit null value. +// So we map both cases to the same output - null +private fun Value.field(key: String): Value? = this.structValue?.fieldsMap?.get(key) + ?.takeIf { it.kindCase != Value.KindCase.NULL_VALUE } + +private fun Value.list(): List? = this.listValue?.valuesList diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidator.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidator.kt new file mode 100644 index 000000000..106591e4c --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidator.kt @@ -0,0 +1,58 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import io.envoyproxy.controlplane.server.DiscoveryServerCallbacks +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import io.envoyproxy.envoy.api.v2.DiscoveryResponse +import io.envoyproxy.envoy.api.v2.core.Node +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.OutgoingPermissionsProperties + +class AllDependenciesValidationException(serviceName: String?) + : NodeMetadataValidationException( + "Blocked service $serviceName from using all dependencies. Only defined services can use all dependencies" +) + +class NodeMetadataValidator( + val outgoingPermissionsProperties: OutgoingPermissionsProperties +) : DiscoveryServerCallbacks { + override fun onStreamClose(streamId: Long, typeUrl: String?) {} + + override fun onStreamCloseWithError(streamId: Long, typeUrl: String?, error: Throwable?) {} + + override fun onStreamOpen(streamId: Long, typeUrl: String?) {} + + override fun onStreamRequest(streamId: Long, request: DiscoveryRequest?) { + request?.node?.let { validateMetadata(it) } + } + + override fun onStreamResponse( + streamId: Long, + request: DiscoveryRequest?, + response: DiscoveryResponse? + ) { + } + + private fun validateMetadata(node: Node) { + // some validation logic is executed when NodeMetadata is created. + // This may throw NodeMetadataValidationException + val metadata = NodeMetadata(node.metadata) + + validateDependencies(metadata) + } + + private fun validateDependencies(metadata: NodeMetadata) { + if (!outgoingPermissionsProperties.enabled) { + return + } + if (hasAllServicesDependencies(metadata) && !isAllowedToHaveAllServiceDependencies(metadata)) { + throw AllDependenciesValidationException(metadata.serviceName) + } + } + + private fun hasAllServicesDependencies(metadata: NodeMetadata) = + metadata.proxySettings.outgoing.containsDependencyForService( + outgoingPermissionsProperties.allServicesDependenciesValue + ) + + private fun isAllowedToHaveAllServiceDependencies(metadata: NodeMetadata) = outgoingPermissionsProperties + .servicesAllowedToUseWildcard.contains(metadata.serviceName) +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/CachedProtoResourcesSerializer.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/CachedProtoResourcesSerializer.kt new file mode 100644 index 000000000..66962c244 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/CachedProtoResourcesSerializer.kt @@ -0,0 +1,26 @@ +package pl.allegro.tech.servicemesh.envoycontrol.server + +import com.google.common.cache.CacheBuilder +import com.google.protobuf.Any +import com.google.protobuf.Message +import io.envoyproxy.controlplane.server.serializer.ProtoResourcesSerializer + +internal class CachedProtoResourcesSerializer : ProtoResourcesSerializer { + + private val cache = CacheBuilder.newBuilder() + .weakValues() + .build, MutableCollection>() + + override fun serialize(resources: MutableCollection): MutableCollection { + return cache.get(resources) { + resources.asSequence() + .map { Any.pack(it) } + .toMutableList() + } + } + + @Suppress("NotImplementedDeclaration") + override fun serialize(resource: Message?): Any { + throw NotImplementedError("Serializing single messages is not supported") + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/ServerProperties.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/ServerProperties.kt new file mode 100644 index 000000000..4d3adfe88 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/ServerProperties.kt @@ -0,0 +1,46 @@ +@file:Suppress("MagicNumber") + +package pl.allegro.tech.servicemesh.envoycontrol.server + +import java.time.Duration + +class ServerProperties { + var port = 50000 + var nioEventLoopThreadCount = 0 // if set to 0, default Netty value will be used: * 2 + var serverPoolSize = 16 + var serverPoolKeepAlive: Duration = Duration.ofMinutes(10) + var executorGroup = ExecutorProperties() + var netty = NettyProperties() + var snapshotCleanup = SnapshotCleanupProperties() +} + +enum class ExecutorType { + DIRECT, PARALLEL +} + +class ExecutorProperties { + var type = ExecutorType.DIRECT + var parallelPoolSize = 4 +} + +class NettyProperties { + /** + * @see io.grpc.netty.NettyServerBuilder.keepAliveTime + */ + var keepAliveTime: Duration = Duration.ofSeconds(15) + + /** + * @see io.grpc.netty.NettyServerBuilder.permitKeepAliveTime + */ + var permitKeepAliveTime: Duration = Duration.ofSeconds(10) + + /** + * @see io.grpc.netty.NettyServerBuilder.permitKeepAliveWithoutCalls + */ + var permitKeepAliveWithoutCalls = true +} + +class SnapshotCleanupProperties { + var collectAfterMillis: Duration = Duration.ofSeconds(10) + var collectionIntervalMillis: Duration = Duration.ofSeconds(10) +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/CompositeDiscoveryServerCallbacks.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/CompositeDiscoveryServerCallbacks.kt new file mode 100644 index 000000000..2d8e9de09 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/CompositeDiscoveryServerCallbacks.kt @@ -0,0 +1,63 @@ +package pl.allegro.tech.servicemesh.envoycontrol.server.callbacks + +import io.envoyproxy.controlplane.server.DiscoveryServerCallbacks +import io.envoyproxy.controlplane.server.exception.RequestException +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import io.envoyproxy.envoy.api.v2.DiscoveryResponse +import io.micrometer.core.instrument.MeterRegistry +import pl.allegro.tech.servicemesh.envoycontrol.logger + +class CompositeException(exceptions: List) : + RuntimeException("Composite exception: " + exceptions.map { it.message }.joinToString(",", "[", "]")) + +class CompositeDiscoveryServerCallbacks( + val meterRegistry: MeterRegistry, + vararg val delegate: DiscoveryServerCallbacks +) : DiscoveryServerCallbacks { + private val logger by logger() + + override fun onStreamCloseWithError(streamId: Long, typeUrl: String?, error: Throwable?) { + runCallbacks { it.onStreamCloseWithError(streamId, typeUrl, error) } + } + + override fun onStreamClose(streamId: Long, typeUrl: String?) { + runCallbacks { it.onStreamClose(streamId, typeUrl) } + } + + override fun onStreamOpen(streamId: Long, typeUrl: String?) { + runCallbacks { it.onStreamOpen(streamId, typeUrl) } + } + + override fun onStreamRequest(streamId: Long, request: DiscoveryRequest?) { + runCallbacks { it.onStreamRequest(streamId, request) } + } + + override fun onStreamResponse( + streamId: Long, + request: DiscoveryRequest?, + response: DiscoveryResponse? + ) { + runCallbacks { it.onStreamResponse(streamId, request, response) } + } + + private fun runCallbacks(fn: (DiscoveryServerCallbacks) -> Unit) { + val exceptions = mutableListOf() + for (callback in delegate) { + try { + fn(callback) + } catch (e: Exception) { + meterRegistry.counter("callbacks.errors").increment() + logger.warn(e.message, e) + when (e) { + // stop callback processing and throw RequestException without wrapping, + // to notify client with proper message + is RequestException -> throw e + else -> exceptions.add(e) + } + } + } + if (exceptions.isNotEmpty()) { + throw CompositeException(exceptions) + } + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/LoggingDiscoveryServerCallbacks.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/LoggingDiscoveryServerCallbacks.kt new file mode 100644 index 000000000..5f2a74bcd --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/LoggingDiscoveryServerCallbacks.kt @@ -0,0 +1,34 @@ +package pl.allegro.tech.servicemesh.envoycontrol.server.callbacks + +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import io.envoyproxy.envoy.api.v2.DiscoveryResponse +import io.envoyproxy.controlplane.server.DiscoveryServerCallbacks +import org.slf4j.LoggerFactory + +class LoggingDiscoveryServerCallbacks : DiscoveryServerCallbacks { + private val logger = LoggerFactory.getLogger(LoggingDiscoveryServerCallbacks::class.java) + + override fun onStreamClose(streamId: Long, typeUrl: String?) { + logger.debug("onStreamClose streamId: {} typeUrl: {}", streamId, typeUrl) + } + + override fun onStreamCloseWithError(streamId: Long, typeUrl: String?, error: Throwable?) { + logger.debug("onStreamCloseWithError streamId: {}, typeUrl: {}", streamId, typeUrl, error) + } + + override fun onStreamOpen(streamId: Long, typeUrl: String?) { + logger.debug("onStreamOpen streamId: {}, typeUrl: {}", streamId, typeUrl) + } + + override fun onStreamRequest(streamId: Long, request: DiscoveryRequest?) { + logger.debug("onStreamRequest streamId: {} request: {}", streamId, request) + } + + override fun onStreamResponse( + streamId: Long, + request: DiscoveryRequest?, + response: DiscoveryResponse? + ) { + logger.debug("onStreamResponse streamId: {}, request: {}, response: {}", streamId, request, response) + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/MeteredConnectionsCallbacks.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/MeteredConnectionsCallbacks.kt new file mode 100644 index 000000000..3ce889442 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/server/callbacks/MeteredConnectionsCallbacks.kt @@ -0,0 +1,52 @@ +package pl.allegro.tech.servicemesh.envoycontrol.server.callbacks + +import io.envoyproxy.controlplane.cache.Resources +import io.envoyproxy.controlplane.server.DiscoveryServerCallbacks +import java.util.concurrent.atomic.AtomicInteger + +class MeteredConnectionsCallbacks( + val connections: AtomicInteger = AtomicInteger() +) : DiscoveryServerCallbacks { + + private val connectionsByType: Map + + enum class MetricsStreamType { + CDS, EDS, LDS, RDS, SDS, ADS, UNKNOWN + } + + init { + connectionsByType = MetricsStreamType.values() + .map { type -> type to AtomicInteger(0) } + .toMap() + } + + override fun onStreamOpen(streamId: Long, typeUrl: String?) { + connections.incrementAndGet() + connectionsByType(typeUrl).incrementAndGet() + } + + override fun onStreamClose(streamId: Long, typeUrl: String?) { + connections.decrementAndGet() + connectionsByType(typeUrl).decrementAndGet() + } + + override fun onStreamCloseWithError(streamId: Long, typeUrl: String?, error: Throwable?) { + connections.decrementAndGet() + connectionsByType(typeUrl).decrementAndGet() + } + + fun connections(type: MetricsStreamType): AtomicInteger = connectionsByType[type]!! + + private fun connectionsByType(typeUrl: String?): AtomicInteger { + val type = when (typeUrl) { + Resources.CLUSTER_TYPE_URL -> MetricsStreamType.CDS + Resources.ENDPOINT_TYPE_URL -> MetricsStreamType.EDS + Resources.LISTENER_TYPE_URL -> MetricsStreamType.LDS + Resources.ROUTE_TYPE_URL -> MetricsStreamType.RDS + Resources.SECRET_TYPE_URL -> MetricsStreamType.SDS + "" -> MetricsStreamType.ADS // ads is when the type url is empty + else -> MetricsStreamType.UNKNOWN + } + return connectionsByType[type]!! + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyClustersFactory.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyClustersFactory.kt new file mode 100644 index 000000000..e0c560a30 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyClustersFactory.kt @@ -0,0 +1,159 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import com.google.protobuf.UInt32Value +import com.google.protobuf.util.Durations +import io.envoyproxy.controlplane.cache.Snapshot +import io.envoyproxy.envoy.api.v2.Cluster +import io.envoyproxy.envoy.api.v2.ClusterLoadAssignment +import io.envoyproxy.envoy.api.v2.auth.CertificateValidationContext +import io.envoyproxy.envoy.api.v2.auth.CommonTlsContext +import io.envoyproxy.envoy.api.v2.auth.UpstreamTlsContext +import io.envoyproxy.envoy.api.v2.cluster.OutlierDetection +import io.envoyproxy.envoy.api.v2.core.Address +import io.envoyproxy.envoy.api.v2.core.AggregatedConfigSource +import io.envoyproxy.envoy.api.v2.core.ApiConfigSource +import io.envoyproxy.envoy.api.v2.core.ConfigSource +import io.envoyproxy.envoy.api.v2.core.DataSource +import io.envoyproxy.envoy.api.v2.core.GrpcService +import io.envoyproxy.envoy.api.v2.core.SocketAddress +import io.envoyproxy.envoy.api.v2.endpoint.Endpoint +import io.envoyproxy.envoy.api.v2.endpoint.LbEndpoint +import io.envoyproxy.envoy.api.v2.endpoint.LocalityLbEndpoints +import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.groups.ServicesGroup + +internal class EnvoyClustersFactory( + private val properties: SnapshotProperties +) { + fun getClustersForServices(services: List, ads: Boolean): List { + return services.map { edsCluster(it, ads) } + } + + fun getClustersForGroup(group: Group, globalSnapshot: Snapshot): List = + getEdsClustersForGroup(group, globalSnapshot) + getStrictDnsClustersForGroup(group) + + private fun getEdsClustersForGroup(group: Group, globalSnapshot: Snapshot): List { + return when (group) { + is ServicesGroup -> group.proxySettings.outgoing.getServiceDependencies() + .mapNotNull { globalSnapshot.clusters().resources().get(it.service) } + is AllServicesGroup -> globalSnapshot.clusters().resources().map { it.value } + } + } + + private fun getStrictDnsClustersForGroup(group: Group): List { + return group.proxySettings.outgoing.getDomainDependencies().map { + strictDnsCluster(it.getClusterName(), it.getHost(), it.getPort(), it.useSsl()) + } + } + + private fun strictDnsCluster(clusterName: String, host: String, port: Int, ssl: Boolean): Cluster { + var clusterBuilder = Cluster.newBuilder() + + if (properties.clusterOutlierDetection.enabled) { + configureOutlierDetection(clusterBuilder) + } + + clusterBuilder = clusterBuilder.setName(clusterName) + .setType(Cluster.DiscoveryType.STRICT_DNS) + .setConnectTimeout(Durations.fromMillis(properties.staticClusterConnectionTimeout.toMillis())) + /* + Default policy for resolving DNS names in Envoy resolves IPV6 addresses in first place + (IPV4 addresses are ignored if IPV6 are available from domain). + There is no policy in Envoy that works in reverse order - this is the reason we are forced to ignore + IPV6 completely by setting policy that resolves only IPV4 addresses. + */ + .setDnsLookupFamily(Cluster.DnsLookupFamily.V4_ONLY) + .setLoadAssignment( + ClusterLoadAssignment.newBuilder().setClusterName(clusterName).addEndpoints( + LocalityLbEndpoints.newBuilder().addLbEndpoints( + LbEndpoint.newBuilder().setEndpoint( + Endpoint.newBuilder().setAddress( + Address.newBuilder().setSocketAddress( + SocketAddress.newBuilder().setAddress(host).setPortValue(port) + ) + ) + ) + ) + ) + ) + .setLbPolicy(Cluster.LbPolicy.LEAST_REQUEST) + + if (ssl) { + var tlsContextBuilder = UpstreamTlsContext.newBuilder() + tlsContextBuilder = tlsContextBuilder.setCommonTlsContext( + CommonTlsContext.newBuilder() + .setValidationContext( + CertificateValidationContext.newBuilder().setTrustedCa( + // TODO: GITHUB-ISSUE + DataSource.newBuilder().setFilename(properties.trustedCaFile).build() + ) + ) + ) + clusterBuilder = clusterBuilder.setTlsContext(tlsContextBuilder.build()) + } + return clusterBuilder.build() + } + + private fun edsCluster(clusterName: String, ads: Boolean): Cluster { + val clusterBuilder = Cluster.newBuilder() + + if (properties.clusterOutlierDetection.enabled) { + configureOutlierDetection(clusterBuilder) + } + + return clusterBuilder.setName(clusterName) + .setType(Cluster.DiscoveryType.EDS) + .setConnectTimeout(Durations.fromMillis(properties.edsConnectionTimeout.toMillis())) + .setEdsClusterConfig( + Cluster.EdsClusterConfig.newBuilder().setEdsConfig( + if (ads) { + ConfigSource.newBuilder().setAds(AggregatedConfigSource.newBuilder()) + } else { + ConfigSource.newBuilder().setApiConfigSource( + ApiConfigSource.newBuilder().setApiType(ApiConfigSource.ApiType.GRPC) + .addGrpcServices(0, GrpcService.newBuilder().setEnvoyGrpc( + GrpcService.EnvoyGrpc.newBuilder() + .setClusterName(properties.xdsClusterName) + ) + ) + ) + } + ).setServiceName(clusterName) + ) + .setLbPolicy(Cluster.LbPolicy.LEAST_REQUEST) + .build() + } + + private fun configureOutlierDetection(clusterBuilder: Cluster.Builder) { + clusterBuilder + .setOutlierDetection( + OutlierDetection.newBuilder() + .setConsecutive5Xx(UInt32Value.of(properties.clusterOutlierDetection.consecutive5xx)) + .setInterval(Durations.fromMillis(properties.clusterOutlierDetection.interval.toMillis())) + .setMaxEjectionPercent(UInt32Value.of(properties.clusterOutlierDetection.maxEjectionPercent)) + .setEnforcingSuccessRate(UInt32Value.of(properties.clusterOutlierDetection.enforcingSuccessRate)) + .setBaseEjectionTime(Durations.fromMillis( + properties.clusterOutlierDetection.baseEjectionTime.toMillis()) + ) + .setEnforcingConsecutive5Xx( + UInt32Value.of(properties.clusterOutlierDetection.enforcingConsecutive5xx) + ) + .setSuccessRateMinimumHosts( + UInt32Value.of(properties.clusterOutlierDetection.successRateMinimumHosts) + ) + .setSuccessRateRequestVolume( + UInt32Value.of(properties.clusterOutlierDetection.successRateRequestVolume) + ) + .setSuccessRateStdevFactor( + UInt32Value.of(properties.clusterOutlierDetection.successRateStdevFactor) + ) + .setConsecutiveGatewayFailure( + UInt32Value.of(properties.clusterOutlierDetection.consecutiveGatewayFailure) + ) + .setEnforcingConsecutiveGatewayFailure( + UInt32Value.of(properties.clusterOutlierDetection.enforcingConsecutiveGatewayFailure) + ) + ) + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactory.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactory.kt new file mode 100644 index 000000000..7ac7d2f65 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactory.kt @@ -0,0 +1,94 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.controlplane.cache.TestResources +import io.envoyproxy.envoy.api.v2.RouteConfiguration +import io.envoyproxy.envoy.api.v2.core.HeaderValue +import io.envoyproxy.envoy.api.v2.core.HeaderValueOption +import io.envoyproxy.envoy.api.v2.route.DirectResponseAction +import io.envoyproxy.envoy.api.v2.route.Route +import io.envoyproxy.envoy.api.v2.route.RouteAction +import io.envoyproxy.envoy.api.v2.route.RouteMatch +import io.envoyproxy.envoy.api.v2.route.VirtualHost + +internal class EnvoyEgressRoutesFactory( + private val properties: SnapshotProperties +) { + + /** + * By default envoy doesn't proxy requests to provided IP address. We created cluster: envoy-original-destination + * which allows direct calls to IP address extracted from x-envoy-original-dst-host header for calls to + * envoy-original-destination cluster. + */ + private val originalDestinationRoute = VirtualHost.newBuilder() + .setName("original-destination-route") + .addDomains("envoy-original-destination") + .addRoutes( + Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix("/") + ) + .setRoute( + RouteAction.newBuilder() + .setCluster("envoy-original-destination") + ) + ) + .build() + + private val wildcardRoute = VirtualHost.newBuilder() + .setName("wildcard-route") + .addDomains("*") + .addRoutes( + Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix("/") + ) + .setDirectResponse( + DirectResponseAction.newBuilder() + .setStatus(properties.egress.clusterNotFoundStatusCode) + ) + ) + .build() + + /** + * @see TestResources.createRoute + */ + fun createEgressRouteConfig(serviceName: String, routesMap: Map): RouteConfiguration { + val virtualHosts = routesMap.map { (clusterName, routeDomains) -> + VirtualHost.newBuilder() + .setName(clusterName) + .addDomains(routeDomains) + .addRoutes( + Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix("/") + ) + .setRoute( + RouteAction.newBuilder() + .setCluster(clusterName) + ) + ) + .build() + } + + return RouteConfiguration.newBuilder() + .setName("default_routes") + .addAllVirtualHosts( + virtualHosts + originalDestinationRoute + wildcardRoute + ).also { + if (properties.incomingPermissions.enabled) { + it.addRequestHeadersToAdd( + HeaderValueOption.newBuilder() + .setHeader( + HeaderValue.newBuilder() + .setKey(properties.incomingPermissions.clientIdentityHeader) + .setValue(serviceName) + ) + ) + } + } + .build() + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactory.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactory.kt new file mode 100644 index 000000000..25f2dba8e --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactory.kt @@ -0,0 +1,247 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import com.google.protobuf.Duration +import com.google.protobuf.UInt32Value +import com.google.protobuf.util.Durations +import io.envoyproxy.envoy.api.v2.RouteConfiguration +import io.envoyproxy.envoy.api.v2.core.DataSource +import io.envoyproxy.envoy.api.v2.route.DirectResponseAction +import io.envoyproxy.envoy.api.v2.route.HeaderMatcher +import io.envoyproxy.envoy.api.v2.route.RetryPolicy +import io.envoyproxy.envoy.api.v2.route.Route +import io.envoyproxy.envoy.api.v2.route.RouteAction +import io.envoyproxy.envoy.api.v2.route.RouteMatch +import io.envoyproxy.envoy.api.v2.route.VirtualCluster +import io.envoyproxy.envoy.api.v2.route.VirtualHost +import pl.allegro.tech.servicemesh.envoycontrol.groups.IncomingEndpoint +import pl.allegro.tech.servicemesh.envoycontrol.groups.PathMatchingType +import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings +import pl.allegro.tech.servicemesh.envoycontrol.groups.Role + +internal class EnvoyIngressRoutesFactory( + private val properties: SnapshotProperties +) { + enum class HttpMethod { + GET, PUT, POST, DELETE, HEAD + } + + private fun localClusterRouteAction( + responseTimeout: Duration?, + idleTimeout: Duration? + ): RouteAction.Builder { + val timeoutResponse = responseTimeout ?: Durations.fromMillis( + properties.localService.responseTimeout.toMillis() + ) + val timeoutIdle = idleTimeout ?: Durations.fromMillis(properties.localService.idleTimeout.toMillis()) + return RouteAction.newBuilder() + .setCluster("local_service") + .setTimeout(timeoutResponse) + .setIdleTimeout(timeoutIdle) + } + + private val metricsRouteAction = RouteAction.newBuilder() + .setCluster("this_admin") + .setPrefixRewrite("/stats/prometheus") + + private val metricsRoute = Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix(properties.routes.metrics.pathPrefix) + .addHeaders(httpMethodMatcher(HttpMethod.GET)) + ) + .setRoute(metricsRouteAction) + .build() + + private val statusPathPattern = properties.routes.status.pathPrefix + ".*" + + private fun statusRoute(localRouteAction: RouteAction.Builder): Route? { + return Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix(properties.routes.status.pathPrefix) + .addHeaders(httpMethodMatcher(HttpMethod.GET)) + ) + .setRoute(localRouteAction) + .build() + } + + private val fallbackIngressRoute = Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix("/") + ) + .setDirectResponse( + DirectResponseAction.newBuilder() + .setStatus(properties.incomingPermissions.endpointUnavailableStatusCode) + .setBody( + DataSource.newBuilder() + .setInlineString("Requested resource is unavailable or client not permitted") + ) + ) + .build() + + private fun retryPolicy(retryProps: RetryPolicyProperties): RetryPolicy = RetryPolicy.newBuilder().apply { + retryOn = retryProps.retryOn.joinToString(separator = ",") + numRetries = UInt32Value.of(retryProps.numRetries) + if (!retryProps.perTryTimeout.isZero) { + perTryTimeout = Durations.fromMillis(retryProps.perTryTimeout.toMillis()) + } + hostSelectionRetryMaxAttempts = retryProps.hostSelectionRetryMaxAttempts + addAllRetriableStatusCodes(retryProps.retriableStatusCodes) + }.build() + + val defaultRetryPolicy: RetryPolicy = retryPolicy(properties.localService.retryPolicy.default) + val perMethodRetryPolicies: Map = properties.localService.retryPolicy.perHttpMethod + .filter { it.value.enabled } + .map { HttpMethod.valueOf(it.key) to retryPolicy(it.value) } + .toMap() + + private fun allOpenIngressRoutes(localRouteAction: RouteAction.Builder): List { + val nonRetryRoute = Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .setPrefix("/") + ) + .setRoute(localRouteAction) + .build() + val retryRoutes = perMethodRetryPolicies + .map { (method, retryPolicy) -> + Route.newBuilder() + .setMatch( + RouteMatch.newBuilder() + .addHeaders(httpMethodMatcher(method)) + .setPrefix("/") + ) + .setRoute(localClusterRouteActionWithRetryPolicy(retryPolicy, localRouteAction)) + .build() + } + return retryRoutes + nonRetryRoute + } + + private fun localClusterRouteActionWithRetryPolicy(method: HttpMethod, localRouteAction: RouteAction.Builder): + RouteAction.Builder = perMethodRetryPolicies[method] + ?.let { localClusterRouteActionWithRetryPolicy(it, localRouteAction) } + ?: localRouteAction + + private fun localClusterRouteActionWithRetryPolicy( + retryPolicy: RetryPolicy, + localRouteAction: RouteAction.Builder + ) = localRouteAction.clone().setRetryPolicy(retryPolicy) + + private fun permissionsDisabledRoutes(localRouteAction: RouteAction.Builder): List { + return if (!properties.routes.metrics.enabled) allOpenIngressRoutes(localRouteAction) + else listOf(metricsRoute) + allOpenIngressRoutes(localRouteAction) + } + + fun createSecuredIngressRouteConfig(proxySettings: ProxySettings): RouteConfiguration { + val virtualClusters = when (statusRouteVirtualClusterEnabled()) { + true -> + listOf( + VirtualCluster.newBuilder() + .setPattern(statusPathPattern) + .setName("status") + .build(), + VirtualCluster.newBuilder() + .setPattern("/.*") + .setName("endpoints") + .build() + ) + false -> + emptyList() + } + + val virtualHost = VirtualHost.newBuilder() + .setName("secured_local_service") + .addDomains("*") + .addAllVirtualClusters(virtualClusters) + .addAllRoutes(generateSecuredIngressRoutes(proxySettings)) + .also { + if (properties.localService.retryPolicy.default.enabled) { + it.retryPolicy = defaultRetryPolicy + } + } + + return RouteConfiguration.newBuilder() + .setName("ingress_secured_routes") + .addVirtualHosts(virtualHost) + .build() + } + + private fun generateSecuredIngressRoutes(proxySettings: ProxySettings): List { + val localRouteAction = localClusterRouteAction( + proxySettings.incoming.timeoutPolicy.responseTimeout, + proxySettings.incoming.timeoutPolicy.idleTimeout + ) + + if (!proxySettings.incoming.permissionsEnabled) { + return permissionsDisabledRoutes(localRouteAction) + } + val rolesByName = proxySettings.incoming.roles.associateBy { it.name.orEmpty() } + + val applicationRoutes = proxySettings.incoming.endpoints + .flatMap { toRoutes(it, rolesByName, localRouteAction) } + + return listOfNotNull( + metricsRoute.takeIf { properties.routes.metrics.enabled }, + statusRoute(localRouteAction) + .takeIf { properties.routes.status.enabled } + ) + applicationRoutes + fallbackIngressRoute + } + + private fun toRoutes( + endpoint: IncomingEndpoint, + roles: Map, + localRouteAction: RouteAction.Builder + ): List { + val routeMatch = RouteMatch.newBuilder() + + when (endpoint.pathMatchingType) { + PathMatchingType.PATH -> routeMatch.path = endpoint.path + PathMatchingType.PATH_PREFIX -> routeMatch.prefix = endpoint.path + } + + val clients = endpoint.clients + .flatMap { roles[it]?.clients ?: listOf(it) } + .distinct() + + val methods = endpoint.methods + .takeIf { it.isNotEmpty() } + ?.map { HttpMethod.valueOf(it) } + ?: perMethodRetryPolicies.keys + + return clients.flatMap { client -> + val routesForMethods = methods.map { method -> + val match = routeMatch.clone() + .addHeaders(clientNameMatcher(client)) + .addHeaders(httpMethodMatcher(method)) + Route.newBuilder() + .setMatch(match) + .setRoute(localClusterRouteActionWithRetryPolicy(method, localRouteAction)) + .build() + } + if (endpoint.methods.isEmpty()) { + val match = routeMatch.clone() + .addHeaders(clientNameMatcher(client)) + routesForMethods + Route.newBuilder() + .setMatch(match) + .setRoute(localRouteAction) + .build() + } else routesForMethods + } + } + + private fun clientNameMatcher(clientName: String): HeaderMatcher.Builder = + HeaderMatcher.newBuilder() + .setName(properties.incomingPermissions.clientIdentityHeader) + .setExactMatch(clientName) + + private fun httpMethodMatcher(method: HttpMethod): HeaderMatcher = exactHeader(":method", method.name) + + private fun exactHeader(name: String, value: String): HeaderMatcher = HeaderMatcher.newBuilder() + .setName(name) + .setExactMatch(value) + .build() + + private fun statusRouteVirtualClusterEnabled() = + properties.routes.status.enabled && properties.routes.status.createVirtualCluster +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoySnapshotFactory.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoySnapshotFactory.kt new file mode 100644 index 000000000..c303d4f60 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoySnapshotFactory.kt @@ -0,0 +1,202 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.controlplane.cache.Snapshot +import io.envoyproxy.envoy.api.v2.Cluster +import io.envoyproxy.envoy.api.v2.ClusterLoadAssignment +import io.envoyproxy.envoy.api.v2.Listener +import io.envoyproxy.envoy.api.v2.RouteConfiguration +import io.envoyproxy.envoy.api.v2.auth.Secret +import io.envoyproxy.envoy.api.v2.core.Address +import io.envoyproxy.envoy.api.v2.core.Locality +import io.envoyproxy.envoy.api.v2.core.SocketAddress +import io.envoyproxy.envoy.api.v2.endpoint.Endpoint +import io.envoyproxy.envoy.api.v2.endpoint.LbEndpoint +import io.envoyproxy.envoy.api.v2.endpoint.LocalityLbEndpoints +import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings +import pl.allegro.tech.servicemesh.envoycontrol.groups.ServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstance +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality as LocalityEnum + +internal class EnvoySnapshotFactory( + private val ingressRoutesFactory: EnvoyIngressRoutesFactory, + private val egressRoutesFactory: EnvoyEgressRoutesFactory, + private val clustersFactory: EnvoyClustersFactory, + private val snapshotsVersions: SnapshotsVersions +) { + fun newSnapshot(servicesStates: List, ads: Boolean): Snapshot { + val serviceNames = servicesStates.flatMap { it.servicesState.serviceNames() }.distinct() + + val clusters: List = clustersFactory.getClustersForServices(serviceNames, ads) + + val endpoints: List = createLoadAssignment(servicesStates) + val routes = listOf( + egressRoutesFactory.createEgressRouteConfig("", serviceNames.map { it to it }.toMap()), + ingressRoutesFactory.createSecuredIngressRouteConfig(ProxySettings()) + ) + + val version = snapshotsVersions.version(AllServicesGroup(ads), clusters, endpoints) + + return createSnapshot( + clusters = clusters, + clustersVersion = version.clusters, + endpoints = endpoints, + endpointsVersions = version.endpoints, + routes = routes, + routesVersion = RoutesVersion(version.clusters.value) + ) + } + + fun getSnapshotForGroup(group: Group, globalSnapshot: Snapshot): Snapshot { + if (group.isGlobalGroup()) { + return globalSnapshot + } + return newSnapshotForGroup(group, globalSnapshot) + } + + private fun getServiceNamesForGroup(group: Group, globalSnapshot: Snapshot): List { + return when (group) { + is ServicesGroup -> group.proxySettings.outgoing.getServiceDependencies().map { it.service } + is AllServicesGroup -> globalSnapshot.clusters().resources().map { it.key } + } + } + + private fun getEgressRouteMap(group: Group, globalSnapshot: Snapshot): Map { + return getServiceNamesForGroup(group, globalSnapshot).map { it to it }.toMap() + + group.proxySettings.outgoing.getDomainDependencies().map { + it.getClusterName() to it.getRouteDomain() + }.toMap() + } + + private fun getServicesEndpointsForGroup(group: Group, globalSnapshot: Snapshot): List { + return getServiceNamesForGroup(group, globalSnapshot) + .mapNotNull { globalSnapshot.endpoints().resources().get(it) } + } + + private fun newSnapshotForGroup( + group: Group, + globalSnapshot: Snapshot + ): Snapshot { + + val clusters: List = + clustersFactory.getClustersForGroup(group, globalSnapshot) + + val routes = listOf( + egressRoutesFactory.createEgressRouteConfig( + group.serviceName, getEgressRouteMap(group, globalSnapshot) + ), + ingressRoutesFactory.createSecuredIngressRouteConfig(group.proxySettings) + ) + + if (clusters.isEmpty()) { + return createSnapshot(routes = routes) + } + + val endpoints = getServicesEndpointsForGroup(group, globalSnapshot) + + val version = snapshotsVersions.version(group, clusters, endpoints) + + return createSnapshot( + clusters = clusters, + clustersVersion = version.clusters, + endpoints = endpoints, + endpointsVersions = version.endpoints, + routes = routes, + // we assume, that routes don't change during Envoy lifecycle unless clusters change + routesVersion = RoutesVersion(version.clusters.value) + ) + } + + private fun createEndpointsGroup( + serviceInstances: ServiceInstances, + zone: String, + priority: Int + ): LocalityLbEndpoints = + LocalityLbEndpoints.newBuilder() + .setLocality(Locality.newBuilder().setZone(zone).build()) + .addAllLbEndpoints(serviceInstances.instances .map { createLbEndpoint(it) }) + .setPriority(priority) + .build() + + private fun createLbEndpoint(serviceInstance: ServiceInstance): LbEndpoint { + return LbEndpoint.newBuilder() + .setEndpoint( + buildEndpoint(serviceInstance) + ) + .build() + } + + private fun buildEndpoint(serviceInstance: ServiceInstance): Endpoint.Builder { + return Endpoint.newBuilder() + .setAddress( + buildAddress(serviceInstance) + ) + } + + private fun buildAddress(serviceInstance: ServiceInstance): Address.Builder { + return Address.newBuilder() + .setSocketAddress( + buildSocketAddress(serviceInstance) + ) + } + + private fun buildSocketAddress(serviceInstance: ServiceInstance): SocketAddress.Builder { + return SocketAddress.newBuilder() + .setAddress(serviceInstance.address) + .setPortValue(serviceInstance.port) + .setProtocol(SocketAddress.Protocol.TCP) + } + + private fun toEnvoyPriority(locality: LocalityEnum): Int = if (locality == LocalityEnum.LOCAL) 0 else 1 + + private fun createLoadAssignment( + localityAwareServicesStates: List + ): List { + return localityAwareServicesStates + .flatMap { + val locality = it.locality + val zone = it.zone + + it.servicesState.serviceNameToInstances.map { (serviceName, serviceInstances) -> + serviceName to createEndpointsGroup(serviceInstances, zone, toEnvoyPriority(locality)) + } + } + .groupBy { (serviceName) -> + serviceName + } + .map { (serviceName, serviceNameLocalityLbEndpointsPairs) -> + val localityLbEndpoints = serviceNameLocalityLbEndpointsPairs.map { (_, localityLbEndpoint) -> + localityLbEndpoint + } + + ClusterLoadAssignment.newBuilder() + .setClusterName(serviceName) + .addAllEndpoints(localityLbEndpoints) + .build() + } + } + + private fun createSnapshot( + clusters: List = emptyList(), + clustersVersion: ClustersVersion = ClustersVersion.EMPTY_VERSION, + endpoints: List = emptyList(), + endpointsVersions: EndpointsVersion = EndpointsVersion.EMPTY_VERSION, + routes: List = emptyList(), + routesVersion: RoutesVersion = RoutesVersion.EMPTY_VERSION + ): Snapshot = + Snapshot.create( + clusters, + clustersVersion.value, + endpoints, + endpointsVersions.value, + emptyList(), + ListenersVersion.EMPTY_VERSION.value, + routes, + routesVersion.value, + emptyList(), + SecretsVersion.EMPTY_VERSION.value + ) +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt new file mode 100644 index 000000000..9b0fc3aac --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotProperties.kt @@ -0,0 +1,89 @@ +@file:Suppress("MagicNumber") + +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import java.time.Duration + +class SnapshotProperties { + var routes = RoutesProperties() + var localService = LocalServiceProperties() + var egress = EgressProperties() + var incomingPermissions = IncomingPermissionsProperties() + var outgoingPermissions = OutgoingPermissionsProperties() + var clusterOutlierDetection = ClusterOutlierDetectionProperties() + var xdsClusterName = "envoy-control-xds" + var edsConnectionTimeout: Duration = Duration.ofSeconds(2) + var stateSampleDuration: Duration = Duration.ofSeconds(1) + var staticClusterConnectionTimeout: Duration = Duration.ofSeconds(2) + var trustedCaFile = "/etc/ssl/certs/ca-certificates.crt" +} + +class OutgoingPermissionsProperties { + var enabled = false + var allServicesDependenciesValue = "*" + var servicesAllowedToUseWildcard: MutableSet = mutableSetOf() +} + +class IncomingPermissionsProperties { + var enabled = false + /** + * unavailable = not found || unauthorized + */ + var endpointUnavailableStatusCode = 503 + var clientIdentityHeader = "x-service-name" +} + +class RoutesProperties { + var metrics = MetricsRouteProperties() + var status = StatusRouteProperties() +} + +class ClusterOutlierDetectionProperties { + var enabled = false + var consecutive5xx = 5 + var interval: Duration = Duration.ofSeconds(10) + var baseEjectionTime: Duration = Duration.ofSeconds(30) + var maxEjectionPercent = 10 + var enforcingConsecutive5xx = 100 + var enforcingSuccessRate = 100 + var successRateMinimumHosts = 5 + var successRateRequestVolume = 100 + var successRateStdevFactor = 1900 + var consecutiveGatewayFailure = 5 + var enforcingConsecutiveGatewayFailure = 0 +} + +class MetricsRouteProperties { + var enabled = false + var pathPrefix = "/status/envoy/stats/prometheus" +} + +class StatusRouteProperties { + var enabled = false + var pathPrefix = "/status/" + var createVirtualCluster = false +} + +class LocalServiceProperties { + var idleTimeout: Duration = Duration.ofSeconds(60) + var responseTimeout: Duration = Duration.ofSeconds(15) + var retryPolicy: RetryPoliciesProperties = RetryPoliciesProperties() +} + +class RetryPoliciesProperties { + var default: RetryPolicyProperties = RetryPolicyProperties() + var perHttpMethod: MutableMap = mutableMapOf() +} + +class RetryPolicyProperties { + var enabled = false + var retryOn: MutableSet = mutableSetOf() + var numRetries: Int = 1 + var perTryTimeout: Duration = Duration.ofMillis(0) + var hostSelectionRetryMaxAttempts: Long = 1 + var retriableStatusCodes: MutableSet = mutableSetOf() +} + +class EgressProperties { + var clusterNotFoundStatusCode = 503 +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdater.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdater.kt new file mode 100644 index 000000000..ea6f782bf --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdater.kt @@ -0,0 +1,51 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.controlplane.cache.Snapshot +import io.envoyproxy.controlplane.cache.SnapshotCache +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import reactor.core.publisher.Flux +import reactor.core.scheduler.Scheduler +import java.util.function.BiFunction + +class SnapshotUpdater( + private val cache: SnapshotCache, + private val properties: SnapshotProperties, + private val scheduler: Scheduler, + private val onGroupAdded: Flux +) { + private val versions = SnapshotsVersions() + private val snapshotFactory = EnvoySnapshotFactory( + ingressRoutesFactory = EnvoyIngressRoutesFactory(properties), + egressRoutesFactory = EnvoyEgressRoutesFactory(properties), + clustersFactory = EnvoyClustersFactory(properties), + snapshotsVersions = versions + ) + + fun start(changes: Flux>): Flux> { + // see GroupChangeWatcher + return Flux.combineLatest( + onGroupAdded, + changes, + BiFunction, List> { _, states -> states } + ) + .sample(properties.stateSampleDuration) + .publishOn(scheduler) + .doOnNext { states -> + versions.retainGroups(cache.groups()) + updateSnapshots(states) + } + } + + private fun updateSnapshots(states: List) { + val snapshot = snapshotFactory.newSnapshot(states, ads = false) + val adsSnapshot = snapshotFactory.newSnapshot(states, ads = true) + + cache.groups().forEach { group -> updateSnapshotForGroup(group, if (group.ads) adsSnapshot else snapshot) } + } + + private fun updateSnapshotForGroup(group: Group, snapshot: Snapshot) { + val groupSnapshot = snapshotFactory.getSnapshotForGroup(group, snapshot) + cache.setSnapshot(group, groupSnapshot) + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersions.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersions.kt new file mode 100644 index 000000000..392d38f9d --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersions.kt @@ -0,0 +1,97 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.envoy.api.v2.Cluster +import io.envoyproxy.envoy.api.v2.ClusterLoadAssignment +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import java.util.UUID +import java.util.concurrent.ConcurrentHashMap + +/** + * We leverage the fact that when Envoy connects to xDS it starts with empty version, therefore we don't have to + * maintain consistent versioning between Envoy Control instances. + * + * We have to generate new version by comparing it to the previously sent data. We cannot use hashes of data because + * we would end up in hash collisions which would result in change in discovery that is not sent to Envoys. + * + * Calls for the version methods are thread safe. + * The concurrent execution of version and retainGroups methods can lead to a situation where after retainGroups + * invocation the group is still there. This is fine, it will be removed on the next retainGroups invocation. + * We don't need strong consistency there. + */ +internal class SnapshotsVersions { + + private val versions = ConcurrentHashMap() + + fun version(group: Group, clusters: List, endpoints: List): Version { + val versionsWithData = versions.compute(group) { _, previous -> + val version = when (previous) { + null -> Version(clusters = ClustersVersion(newVersion()), endpoints = EndpointsVersion(newVersion())) + else -> Version( + clusters = selectClusters(previous, clusters), + endpoints = selectEndpoints(previous, endpoints) + ) + } + VersionsWithData(version, clusters, endpoints) + } + return versionsWithData!!.version + } + + private fun selectEndpoints( + previous: VersionsWithData, + endpoints: List + ) = if (previous.endpoints == endpoints) previous.version.endpoints else EndpointsVersion(newVersion()) + + private fun selectClusters( + previous: VersionsWithData, + clusters: List + ) = if (previous.clusters == clusters) previous.version.clusters else ClustersVersion(newVersion()) + + /** + * This should be called before setting new snapshot to cache. The cache cleans up not used groups by using + * SnapshotCollectingCallback. This should be executed so we won't store versions for stale groups. + */ + fun retainGroups(groups: Iterable) { + val toRemove = versions.keys - groups + toRemove.forEach { group -> versions.remove(group) } + } + + private fun newVersion(): String = UUID.randomUUID().toString().replace("-", "") + + private data class VersionsWithData( + val version: Version, + val clusters: List, + val endpoints: List + ) + + internal data class Version(val clusters: ClustersVersion, val endpoints: EndpointsVersion) +} + +data class ClustersVersion(val value: String) { + companion object { + val EMPTY_VERSION = ClustersVersion("empty") + } +} + +data class EndpointsVersion(val value: String) { + companion object { + val EMPTY_VERSION = EndpointsVersion("empty") + } +} + +data class RoutesVersion(val value: String) { + companion object { + val EMPTY_VERSION = RoutesVersion("empty") + } +} + +data class ListenersVersion(val value: String) { + companion object { + val EMPTY_VERSION = ListenersVersion("empty") + } +} + +data class SecretsVersion(val value: String) { + companion object { + val EMPTY_VERSION = SecretsVersion("empty") + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncControlPlaneClient.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncControlPlaneClient.kt new file mode 100644 index 000000000..e30935abc --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/AsyncControlPlaneClient.kt @@ -0,0 +1,9 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import reactor.core.publisher.Mono +import java.net.URI + +interface AsyncControlPlaneClient { + fun getState(uri: URI): Mono +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/ControlPlaneInstanceFetcher.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/ControlPlaneInstanceFetcher.kt new file mode 100644 index 000000000..c3856d2e2 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/ControlPlaneInstanceFetcher.kt @@ -0,0 +1,7 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import java.net.URI + +interface ControlPlaneInstanceFetcher { + fun instances(dc: String): List +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServiceChanges.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServiceChanges.kt new file mode 100644 index 000000000..49ecad358 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServiceChanges.kt @@ -0,0 +1,17 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlProperties +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceChanges +import reactor.core.publisher.Flux + +class CrossDcServiceChanges( + val properties: EnvoyControlProperties, + val crossDcService: CrossDcServices +) : ServiceChanges { + override fun stream(): Flux> = + crossDcService + .getChanges(properties.sync.pollingInterval) + .startWith(emptySet()) + .distinctUntilChanged() +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServices.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServices.kt new file mode 100644 index 000000000..18352bf1f --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServices.kt @@ -0,0 +1,77 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import io.micrometer.core.instrument.MeterRegistry +import pl.allegro.tech.servicemesh.envoycontrol.logger +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import reactor.core.publisher.Flux +import reactor.core.publisher.Mono +import java.net.URI +import java.time.Duration +import java.util.concurrent.ConcurrentHashMap +import java.util.stream.Collectors + +class CrossDcServices( + private val controlPlaneClient: AsyncControlPlaneClient, + private val meterRegistry: MeterRegistry, + private val controlPlaneInstanceFetcher: ControlPlaneInstanceFetcher, + private val remoteDcs: List +) { + private val logger by logger() + private val dcServicesCache = ConcurrentHashMap() + + fun getChanges(interval: Long): Flux> { + return Flux + .interval(Duration.ofSeconds(0), Duration.ofSeconds(interval)) + // Cross DC sync is not a backpressure compatible stream. If running cross dc sync is slower than interval + // we have to drop interval events and run another cross dc on another interval tick. + .onBackpressureDrop() + .flatMap { + Flux.fromIterable(remoteDcs) + .map { dc -> dcWithControlPlaneInstances(dc) } + .filter { (_, instances) -> instances.isNotEmpty() } + .flatMap { (dc, instances) -> servicesStateFromDc(dc, instances) } + .collect(Collectors.toSet()) + } + .filter { + it.isNotEmpty() + } + .doOnCancel { + meterRegistry.counter("cross-dc-synchronization.cancelled").increment() + logger.warn("Cancelling cross dc sync") + } + } + + private fun dcWithControlPlaneInstances(dc: String): Pair> { + return try { + val instances = controlPlaneInstanceFetcher.instances(dc) + dc to instances + } catch (e: Exception) { + meterRegistry.counter("cross-dc-synchronization.$dc.instance-fetcher.errors").increment() + logger.warn("Failed fetching instances from $dc", e) + dc to emptyList() + } + } + + private fun servicesStateFromDc( + dc: String, + instances: List + ): Mono { + val instance = chooseInstance(instances) + return controlPlaneClient + .getState(instance) + .map { + LocalityAwareServicesState(it, Locality.REMOTE, dc) + } + .doOnSuccess { + dcServicesCache += dc to it + } + .onErrorResume { exception -> + meterRegistry.counter("cross-dc-synchronization.$dc.state-fetcher.errors").increment() + logger.warn("Error synchronizing instances ${exception.message}", exception) + Mono.justOrEmpty(dcServicesCache[dc]) + } + } + + private fun chooseInstance(serviceInstances: List): URI = serviceInstances.random() +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/GlobalServiceChanges.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/GlobalServiceChanges.kt new file mode 100644 index 000000000..15f524567 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/GlobalServiceChanges.kt @@ -0,0 +1,19 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceChanges +import reactor.core.publisher.Flux + +class GlobalServiceChanges( + private val serviceChanges: Array +) { + fun combined(): Flux> { + val serviceStatesStreams: List>> = serviceChanges.map { it.stream() } + + return Flux.combineLatest(serviceStatesStreams) { statesArray -> + (statesArray.asSequence() as Sequence>) + .flatten() + .toList() + } + } +} diff --git a/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/SyncProperties.kt b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/SyncProperties.kt new file mode 100644 index 000000000..294096b60 --- /dev/null +++ b/envoy-control/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/SyncProperties.kt @@ -0,0 +1,13 @@ +@file:Suppress("MagicNumber") + +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import java.time.Duration + +class SyncProperties { + var enabled = false + var pollingInterval: Long = 1 + var connectionTimeout: Duration = Duration.ofMillis(1000) + var readTimeout: Duration = Duration.ofMillis(500) + var envoyControlAppName = "envoy-control" +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroupTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroupTest.kt new file mode 100644 index 000000000..63f26f251 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/MetadataNodeGroupTest.kt @@ -0,0 +1,186 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import com.google.protobuf.util.Durations +import io.envoyproxy.envoy.api.v2.core.Node +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test + +class MetadataNodeGroupTest { + @Test + fun `should assign to group with all dependencies`() { + // given + val nodeGroup = MetadataNodeGroup(allServicesDependenciesValue = "*", outgoingPermissions = true) + val node = node(serviceDependencies = setOf("*", "a", "b", "c"), ads = false) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo(AllServicesGroup( + // we have to preserve all services even if wildcard is present, + // because service may define different settings for different dependencies (for example endpoints, which + // will be implemented in GITHUB-ISSUE + proxySettings = ProxySettings().with(serviceDependencies = setOf("*", "a", "b", "c")), + ads = false) + ) + } + + @Test + fun `should assign to group with no dependencies`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true) + + // when + val group = nodeGroup.hash(Node.newBuilder().build()) + + // then + assertThat(group).isEqualTo( + ServicesGroup(proxySettings = ProxySettings().with(serviceDependencies = setOf()), ads = false) + ) + } + + @Test + fun `should assign to group with listed dependencies`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true) + val node = node(serviceDependencies = setOf("a", "b", "c"), ads = false) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo( + ServicesGroup(proxySettings = ProxySettings().with(serviceDependencies = setOf("a", "b", "c")), ads = false) + ) + } + + @Test + fun `should assign to group with all dependencies on ads`() { + // given + val nodeGroup = MetadataNodeGroup(allServicesDependenciesValue = "*", outgoingPermissions = true) + val node = node(serviceDependencies = setOf("*"), ads = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo( + AllServicesGroup(proxySettings = ProxySettings().with(serviceDependencies = setOf("*")), ads = true) + ) + } + + @Test + fun `should assign to group with listed dependencies on ads`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true) + val node = node(serviceDependencies = setOf("a", "b", "c"), ads = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo( + ServicesGroup(proxySettings = ProxySettings().with(serviceDependencies = setOf("a", "b", "c")), ads = true) + ) + } + + @Test + fun `should assign to group with all dependencies when outgoing-permissions is not enabled`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = false) + val node = node(serviceDependencies = setOf("a", "b", "c"), ads = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo(AllServicesGroup( + // we have to preserve all services even if outgoingPermissions is disabled, + // because service may define different settings for different dependencies (for example retry config) + proxySettings = ProxySettings().with(serviceDependencies = setOf("a", "b", "c")), + ads = true + )) + } + + @Test + fun `should not include service settings when incoming permissions are disabled`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true) + val node = node( + serviceDependencies = setOf("a", "b", "c"), + ads = false, serviceName = "app1", + incomingSettings = true + ) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo( + ServicesGroup(proxySettings = ProxySettings().with(serviceDependencies = setOf("a", "b", "c")), ads = false) + ) + } + + @Test + fun `should not include service settings when incoming permissions are disabled for all dependencies`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true, incomingPermissions = false) + val node = node(serviceDependencies = setOf("*"), ads = false, serviceName = "app1", incomingSettings = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group.proxySettings.incoming).isEqualTo(Incoming()) + } + + @Test + fun `should include service settings when incoming permissions are enabled`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true, incomingPermissions = true) + val node = node(serviceDependencies = setOf("a", "b"), ads = true, serviceName = "app1", incomingSettings = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo(ServicesGroup( + ads = true, + serviceName = "app1", + proxySettings = addedProxySettings.with(serviceDependencies = setOf("a", "b")) + )) + } + + @Test + fun `should include service settings when incoming permissions are enabled for all dependencies`() { + // given + val nodeGroup = MetadataNodeGroup(outgoingPermissions = true, incomingPermissions = true) + val node = node(serviceDependencies = setOf("*"), ads = false, serviceName = "app1", incomingSettings = true) + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group).isEqualTo(AllServicesGroup( + ads = false, + serviceName = "app1", + proxySettings = addedProxySettings.with(serviceDependencies = setOf("*")) + )) + } + + @Test + fun `should parse proto incoming timeout policy`() { + // when + val nodeGroup = MetadataNodeGroup(allServicesDependenciesValue = "*", outgoingPermissions = true) + val node = node(serviceDependencies = setOf("*"), ads = true, incomingSettings = true, + responseTimeout = "777s", idleTimeout = "13.33s") + + // when + val group = nodeGroup.hash(node) + + // then + assertThat(group.proxySettings.incoming.timeoutPolicy.responseTimeout?.seconds).isEqualTo(777) + assertThat(group.proxySettings.incoming.timeoutPolicy.idleTimeout).isEqualTo(Durations.parse("13.33s") + ) + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt new file mode 100644 index 000000000..37bc06aaa --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataTest.kt @@ -0,0 +1,170 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import io.grpc.Status +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows + +class NodeMetadataTest { + + @Test + fun `should reject endpoint with both path and pathPrefix defined`() { + // given + val proto = incomingEndpointProto(path = "/path", pathPrefix = "/prefix") + + // expects + val exception = assertThrows { proto.toIncomingEndpoint() } + assertThat(exception.status.description).isEqualTo("Precisely one of 'path' and 'pathPrefix' field is allowed") + assertThat(exception.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + + @Test + fun `should reject endpoint with no path or pathPrefix defined`() { + // given + val proto = incomingEndpointProto(path = null, pathPrefix = null) + + // expects + val exception = assertThrows { proto.toIncomingEndpoint() } + assertThat(exception.status.description).isEqualTo("One of 'path' or 'pathPrefix' field is required") + assertThat(exception.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + + @Test + fun `should accept endpoint with both path and pathPrefix defined but prefix is null`() { + // given + val proto = incomingEndpointProto(path = "/path", pathPrefix = null, includeNullFields = true) + + // when + val result = proto.toIncomingEndpoint() + + // then + // no exception thrown + assertThat(result.path).isEqualTo("/path") + assertThat(result.pathMatchingType).isEqualTo(PathMatchingType.PATH) + } + + @Test + fun `should accept endpoint with both path and pathPrefix defined but path is null`() { + // given + val proto = incomingEndpointProto(path = null, pathPrefix = "/prefix", includeNullFields = true) + + // when + val result = proto.toIncomingEndpoint() + + // then + // no exception thrown + assertThat(result.path).isEqualTo("/prefix") + assertThat(result.pathMatchingType).isEqualTo(PathMatchingType.PATH_PREFIX) + } + + @Test + fun `should reject dependency with neither service nor domain field defined`() { + // given + val proto = outgoingDependencyProto() + + // expects + val exception = assertThrows { proto.toDependency() } + assertThat(exception.status.description) + .isEqualTo("Define either 'service' or 'domain' as an outgoing dependency") + assertThat(exception.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + + @Test + fun `should reject dependency with both service and domain fields defined`() { + // given + val proto = outgoingDependencyProto(service = "service", domain = "http://domain") + + // expects + val exception = assertThrows { proto.toDependency() } + assertThat(exception.status.description) + .isEqualTo("Define either 'service' or 'domain' as an outgoing dependency") + assertThat(exception.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + + @Test + fun `should reject dependency with unsupported protocol in domain field `() { + // given + val proto = outgoingDependencyProto(domain = "ftp://domain") + + // expects + val exception = assertThrows { proto.toDependency() } + assertThat(exception.status.description) + .isEqualTo("Unsupported protocol for domain dependency for domain ftp://domain") + assertThat(exception.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + + @Test + fun `should check if dependency for service is defined`() { + // given + val outgoing = Outgoing(listOf(ServiceDependency("service-first"))) + + // expects + assertThat(outgoing.containsDependencyForService("service-first")).isTrue() + assertThat(outgoing.containsDependencyForService("service-second")).isFalse() + } + + @Test + fun `should accept domain dependency`() { + // given + val proto = outgoingDependencyProto(domain = "http://domain") + + // expects + val dependency = proto.toDependency() + assertThat(dependency).isInstanceOf(DomainDependency::class.java) + assertThat((dependency as DomainDependency).domain).isEqualTo("http://domain") + } + + @Test + fun `should accept service dependency`() { + // given + val proto = outgoingDependencyProto(service = "my-service") + + // expects + val dependency = proto.toDependency() + assertThat(dependency).isInstanceOf(ServiceDependency::class.java) + assertThat((dependency as ServiceDependency).service).isEqualTo("my-service") + } + + @Test + fun `should return correct host and default port for domain dependency`() { + // given + val proto = outgoingDependencyProto(domain = "http://domain") + val dependency = proto.toDependency() as DomainDependency + + // expects + assertThat(dependency.getHost()).isEqualTo("domain") + assertThat(dependency.getPort()).isEqualTo(80) + } + + @Test + fun `should return custom port for domain dependency if it was defined`() { + // given + val proto = outgoingDependencyProto(domain = "http://domain:1234") + val dependency = proto.toDependency() as DomainDependency + + // expects + assertThat(dependency.getPort()).isEqualTo(1234) + } + + @Test + fun `should return correct names for domain dependency without port specified`() { + // given + val proto = outgoingDependencyProto(domain = "http://domain.pl") + val dependency = proto.toDependency() as DomainDependency + + // expects + assertThat(dependency.getClusterName()).isEqualTo("domain_pl_80") + assertThat(dependency.getRouteDomain()).isEqualTo("domain.pl") + } + + @Test + fun `should return correct names for domain dependency with port specified`() { + // given + val proto = outgoingDependencyProto(domain = "http://domain.pl:80") + val dependency = proto.toDependency() as DomainDependency + + // expects + assertThat(dependency.getClusterName()).isEqualTo("domain_pl_80") + assertThat(dependency.getRouteDomain()).isEqualTo("domain.pl:80") + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidatorTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidatorTest.kt new file mode 100644 index 000000000..fac9c1f47 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/NodeMetadataValidatorTest.kt @@ -0,0 +1,72 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import io.grpc.Status +import org.assertj.core.api.Assertions.assertThat +import org.assertj.core.api.Assertions.assertThatExceptionOfType +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.OutgoingPermissionsProperties + +class NodeMetadataValidatorTest { + val validator = NodeMetadataValidator(OutgoingPermissionsProperties().apply { + enabled = true + servicesAllowedToUseWildcard = mutableSetOf("vis-1", "vis-2") + }) + + @Test + fun `should fail if service has no privilege to use wildcard`() { + // given + val node = node( + serviceDependencies = setOf("*", "a", "b", "c"), + serviceName = "regular-1" + ) + val request = DiscoveryRequest.newBuilder().setNode(node).build() + + // expects + assertThatExceptionOfType(AllDependenciesValidationException::class.java) + .isThrownBy { validator.onStreamRequest(streamId = 123, request = request) } + .satisfies { + assertThat(it.status.description).isEqualTo( + "Blocked service regular-1 from using all dependencies. Only defined services can use all dependencies" + ) + assertThat(it.status.code).isEqualTo(Status.Code.INVALID_ARGUMENT) + } + } + + @Test + fun `should not fail if service has privilege to use wildcard`() { + // given + val node = node( + serviceDependencies = setOf("*", "a", "b", "c"), + serviceName = "vis-1" + ) + + val request = DiscoveryRequest.newBuilder().setNode(node).build() + + // when + validator.onStreamRequest(123, request = request) + + // then + // no exception thrown + } + + @Test + fun `should not fail if outgoing-permissions is disabled`() { + // given + val permissionsDisabledValidator = NodeMetadataValidator(OutgoingPermissionsProperties().apply { + enabled = false + servicesAllowedToUseWildcard = mutableSetOf("vis-1", "vis-2") + }) + val node = node( + serviceDependencies = setOf("*", "a", "b", "c"), + serviceName = "regular-1" + ) + val request = DiscoveryRequest.newBuilder().setNode(node).build() + + // when + permissionsDisabledValidator.onStreamRequest(123, request = request) + + // then + // no exception thrown + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/RoutesAssertions.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/RoutesAssertions.kt new file mode 100644 index 000000000..4dd9c3aba --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/RoutesAssertions.kt @@ -0,0 +1,165 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import com.google.protobuf.Duration +import io.envoyproxy.envoy.api.v2.RouteConfiguration +import io.envoyproxy.envoy.api.v2.route.DirectResponseAction +import io.envoyproxy.envoy.api.v2.route.RetryPolicy +import io.envoyproxy.envoy.api.v2.route.Route +import io.envoyproxy.envoy.api.v2.route.VirtualCluster +import io.envoyproxy.envoy.api.v2.route.VirtualHost +import org.assertj.core.api.Assertions.assertThat +import pl.allegro.tech.servicemesh.envoycontrol.snapshot.RetryPolicyProperties + +fun RouteConfiguration.hasSingleVirtualHostThat(condition: VirtualHost.() -> Unit): RouteConfiguration { + assertThat(this.virtualHostsList).hasSize(1) + condition(this.virtualHostsList[0]) + return this +} + +fun RouteConfiguration.hasHeaderToAdd(key: String, value: String): RouteConfiguration { + assertThat(this.requestHeadersToAddList).anySatisfy { + assertThat(it.header.key).isEqualTo(key) + assertThat(it.header.value).isEqualTo(value) + } + return this +} + +fun RouteConfiguration.hasNoHeaderToAdd(key: String): RouteConfiguration { + assertThat(this.requestHeadersToAddList).noneSatisfy { + assertThat(it.header.key).isEqualTo(key) + } + return this +} + +fun VirtualHost.hasStatusVirtualClusters(): VirtualHost { + return this.hasVirtualClustersInOrder( + { it.pattern == "/status/.*" && it.name == "status" }, + { it.pattern == "/.*" && it.name == "endpoints" } + ) +} + +fun VirtualHost.hasVirtualClustersInOrder(vararg conditions: (VirtualCluster) -> Boolean): VirtualHost { + assertThat( + this.virtualClustersList.zip(conditions) + .filter { (cluster, condition) -> condition(cluster) } + ).hasSameSizeAs(conditions) + return this +} + +fun VirtualHost.hasOneDomain(domain: String): VirtualHost { + assertThat(this.domainsList).hasSize(1).allMatch { it == domain } + return this +} + +fun VirtualHost.hasOnlyRoutesInOrder(vararg conditions: Route.() -> Unit): VirtualHost { + assertThat(this.routesList).hasSameSizeAs(conditions) + assertThat( + this.routesList.zip(conditions) + .map { (route, condition) -> condition(route) } + ).hasSameSizeAs(conditions) + return this +} + +fun Route.matchingOnPrefix(prefix: String): Route { + assertThat(this.match).matches { it.prefix == prefix && it.path == "" } + return this +} + +fun Route.matchingOnPath(path: String): Route { + assertThat(this.match).matches { it.path == path && it.prefix == "" } + return this +} + +fun Route.matchingOnMethod(method: String): Route { + assertThat(this.match.headersList).anyMatch { + it.name == ":method" && it.exactMatch == method + } + return this +} + +fun Route.matchingOnAnyMethod(): Route { + assertThat(this.match.headersList).noneMatch { it.name == ":method" } + return this +} + +fun Route.publicAccess(): Route { + assertThat(this.match.headersList).allMatch { it.name != "x-service-name" } + return this +} + +fun Route.accessOnlyForClient(client: String): Route { + assertThat(this.match.headersList.filter { it.name == "x-service-name" }) + .hasSize(1) + .allMatch { it.exactMatch == client } + return this +} + +fun Route.toCluster(cluster: String): Route { + assertThat(this.route.cluster).isEqualTo(cluster) + return this +} + +fun Route.directResponse(condition: (DirectResponseAction) -> Boolean) { + assertThat(this.directResponse).satisfies { condition(it) } +} + +fun Route.matchingRetryPolicy(properties: RetryPolicyProperties) { + matchingRetryPolicy(this.route.retryPolicy, properties) +} + +fun VirtualHost.matchingRetryPolicy(properties: RetryPolicyProperties) { + matchingRetryPolicy(this.retryPolicy, properties) +} + +fun matchingRetryPolicy(retryPolicy: RetryPolicy, properties: RetryPolicyProperties) = retryPolicy.run { + assertThat(retryOn).isEqualTo(properties.retryOn.joinToString(separator = ",")) + assertThat(numRetries.value).isEqualTo(properties.numRetries) + assertThat(perTryTimeout.seconds).isEqualTo(properties.perTryTimeout.seconds) + assertThat(hostSelectionRetryMaxAttempts).isEqualTo(properties.hostSelectionRetryMaxAttempts) + assertThat(retriableStatusCodesList).containsExactlyInAnyOrderElementsOf(properties.retriableStatusCodes) +} + +fun Route.matchingOnResponseTimeout(responseTimeout: Duration): Route { + assertThat(this.route.timeout.seconds).isEqualTo(responseTimeout.seconds) + return this +} +fun Route.matchingOnIdleTimeout(idleTimeout: Duration): Route { + assertThat(this.route.idleTimeout.seconds).isEqualTo(idleTimeout.seconds) + return this +} + +fun Route.hasNoRetryPolicy() { + assertThat(this.route.retryPolicy).isEqualTo(RetryPolicy.newBuilder().build()) +} + +fun metricsRoute(): (Route) -> Unit = { + it.matchingOnPrefix("/status/envoy/stats/prometheus") + .matchingOnMethod("GET") + .publicAccess() + .toCluster("this_admin") +} + +fun Route.allOpenIngressRoute() { + this.matchingOnPrefix("/") + .publicAccess() + .toCluster("local_service") +} + +fun fallbackIngressRoute(): (Route) -> Unit = { + it.matchingOnPrefix("/") + .publicAccess() + .directResponse { it.status == 503 } +} + +fun statusRoute(idleTimeout: Duration? = null, responseTimeout: Duration? = null): (Route) -> Unit = { + it.matchingOnPrefix("/status/") + .matchingOnMethod("GET") + .publicAccess() + .toCluster("local_service") + if (responseTimeout != null) { + it.matchingOnResponseTimeout(responseTimeout) + } + if (idleTimeout != null) { + it.matchingOnIdleTimeout(idleTimeout) + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt new file mode 100644 index 000000000..509b59d43 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/groups/TestNodeFactory.kt @@ -0,0 +1,136 @@ +package pl.allegro.tech.servicemesh.envoycontrol.groups + +import com.google.protobuf.ListValue +import com.google.protobuf.NullValue +import com.google.protobuf.Struct +import com.google.protobuf.Value +import io.envoyproxy.envoy.api.v2.core.Node + +fun node( + serviceDependencies: Set = emptySet(), + ads: Boolean? = null, + serviceName: String? = null, + incomingSettings: Boolean = false, + idleTimeout: String? = null, + responseTimeout: String? = null +): Node { + val meta = Node.newBuilder().metadataBuilder + + serviceName?.let { + meta.putFields("service_name", string(serviceName)) + } + + ads?.let { + meta.putFields("ads", Value.newBuilder().setBoolValue(ads).build()) + } + + if (incomingSettings || !serviceDependencies.isEmpty()) { + meta.putFields( + "proxy_settings", + proxySettingsProto( + path = "/endpoint", + serviceDependencies = serviceDependencies, + incomingSettings = incomingSettings, + idleTimeout = idleTimeout, + responseTimeout = responseTimeout + ) + ) + } + + return Node.newBuilder() + .setMetadata(meta) + .build() +} + +val addedProxySettings = ProxySettings(Incoming( + endpoints = listOf(IncomingEndpoint( + path = "/endpoint", + clients = setOf("client1") + )), + permissionsEnabled = true +)) + +fun ProxySettings.with(serviceDependencies: Set = emptySet(), domainDependencies: Set = emptySet()) = copy( + outgoing = Outgoing(dependencies = serviceDependencies.map { ServiceDependency(service = it) } + + domainDependencies.map { DomainDependency(domain = it) }) +) + +fun proxySettingsProto( + incomingSettings: Boolean, + path: String? = null, + serviceDependencies: Set = emptySet(), + idleTimeout: String? = null, + responseTimeout: String? = null +): Value = struct { + if (incomingSettings) { + putFields("incoming", struct { + putFields("endpoints", list { + addValues(incomingEndpointProto(path = path)) + }) + putFields("timeoutPolicy", struct { + idleTimeout?.let { + putFields("idleTimeout", string(it)) + } + responseTimeout?.let { + putFields("responseTimeout", string(it)) + } + }) + }) + } + if (!serviceDependencies.isEmpty()) { + putFields("outgoing", struct { + putFields("dependencies", list { + serviceDependencies.forEach { + addValues(outgoingDependencyProto(service = it)) + } + }) + }) + } +} + +fun outgoingDependencyProto(service: String? = null, domain: String? = null) = struct { + service?.also { putFields("service", string(service)) } + domain?.also { putFields("domain", string(domain)) } +} + +fun incomingEndpointProto( + path: String? = null, + pathPrefix: String? = null, + includeNullFields: Boolean = false +): Value = struct { + when { + path != null -> string(path) + includeNullFields -> nullValue + else -> null + }?.also { + putFields("path", it) + } + + when { + pathPrefix != null -> string(pathPrefix) + includeNullFields -> nullValue + else -> null + }?.also { + putFields("pathPrefix", it) + } + + putFields("clients", list { addValues(string("client1")) }) +} + +private fun struct(fields: Struct.Builder.() -> Unit): Value { + val builder = Struct.newBuilder() + fields(builder) + return Value.newBuilder().setStructValue(builder).build() +} + +private fun list(elements: ListValue.Builder.() -> Unit): Value { + val builder = ListValue.newBuilder() + elements(builder) + return Value.newBuilder().setListValue(builder).build() +} + +private fun string(value: String): Value { + return Value.newBuilder().setStringValue(value).build() +} + +private val nullValue: Value = Value.newBuilder().setNullValue(NullValue.NULL_VALUE).build() diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactoryTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactoryTest.kt new file mode 100644 index 000000000..58d6fc6cf --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyEgressRoutesFactoryTest.kt @@ -0,0 +1,40 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasHeaderToAdd +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasNoHeaderToAdd + +internal class EnvoyEgressRoutesFactoryTest { + + val clusters = mapOf("srv1" to "srv1") + + @Test + fun `should add client identity header if incoming permissions are enabled`() { + // given + val routesFactory = EnvoyEgressRoutesFactory(SnapshotProperties().apply { + incomingPermissions.enabled = true + }) + + // when + val routeConfig = routesFactory.createEgressRouteConfig("client1", clusters) + + // then + routeConfig + .hasHeaderToAdd("x-service-name", "client1") + } + + @Test + fun `should not add client identity header if incoming permissions are disabled`() { + // given + val routesFactory = EnvoyEgressRoutesFactory(SnapshotProperties().apply { + incomingPermissions.enabled = false + }) + + // when + val routeConfig = routesFactory.createEgressRouteConfig("client1", clusters) + + // then + routeConfig + .hasNoHeaderToAdd("x-service-name") + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactoryTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactoryTest.kt new file mode 100644 index 000000000..6bdbbcf40 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/EnvoyIngressRoutesFactoryTest.kt @@ -0,0 +1,276 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import com.google.protobuf.util.Durations +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.groups.Incoming +import pl.allegro.tech.servicemesh.envoycontrol.groups.IncomingEndpoint +import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings +import pl.allegro.tech.servicemesh.envoycontrol.groups.Role +import pl.allegro.tech.servicemesh.envoycontrol.groups.accessOnlyForClient +import pl.allegro.tech.servicemesh.envoycontrol.groups.allOpenIngressRoute +import pl.allegro.tech.servicemesh.envoycontrol.groups.fallbackIngressRoute +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasNoRetryPolicy +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasOneDomain +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasOnlyRoutesInOrder +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasSingleVirtualHostThat +import pl.allegro.tech.servicemesh.envoycontrol.groups.hasStatusVirtualClusters +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingOnAnyMethod +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingOnMethod +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingOnPath +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingRetryPolicy +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingOnResponseTimeout +import pl.allegro.tech.servicemesh.envoycontrol.groups.matchingOnIdleTimeout +import pl.allegro.tech.servicemesh.envoycontrol.groups.metricsRoute +import pl.allegro.tech.servicemesh.envoycontrol.groups.statusRoute +import pl.allegro.tech.servicemesh.envoycontrol.groups.toCluster +import pl.allegro.tech.servicemesh.envoycontrol.groups.TimeoutPolicy +import java.time.Duration + +internal class EnvoyIngressRoutesFactoryTest { + + private val retryPolicyProps = RetryPoliciesProperties().apply { + default = RetryPolicyProperties().apply { + enabled = true + retryOn = mutableSetOf("connection-failure") + numRetries = 3 + } + perHttpMethod = mutableMapOf( + "GET" to RetryPolicyProperties().apply { + enabled = true + retryOn = mutableSetOf("reset", "connection-failure") + numRetries = 1 + perTryTimeout = Duration.ofSeconds(1) + hostSelectionRetryMaxAttempts = 3 + }, + "HEAD" to RetryPolicyProperties().apply { + enabled = true + retryOn = mutableSetOf("connection-failure") + numRetries = 6 + }, + "POST" to RetryPolicyProperties().apply { + enabled = false + retryOn = mutableSetOf("connection-failure") + numRetries = 6 + } + ) + } + private val routesFactory = EnvoyIngressRoutesFactory(SnapshotProperties().apply { + routes.status.enabled = true + routes.status.createVirtualCluster = true + routes.metrics.enabled = true + localService.retryPolicy = retryPolicyProps + }) + + @Test + fun `should create legacy ingress route config`() { + // given + val emptyProxySettings = ProxySettings() + + // when + val routeConfig = routesFactory.createSecuredIngressRouteConfig(emptyProxySettings) + + // then + routeConfig + .hasSingleVirtualHostThat { + hasStatusVirtualClusters() + hasOneDomain("*") + hasOnlyRoutesInOrder( + metricsRoute(), + { + allOpenIngressRoute() + matchingOnMethod("GET") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["GET"]!!) + }, + { + allOpenIngressRoute() + matchingOnMethod("HEAD") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["HEAD"]!!) + }, + { + allOpenIngressRoute() + matchingOnAnyMethod() + hasNoRetryPolicy() + } + ) + matchingRetryPolicy(retryPolicyProps.default) + } + } + + @Test + fun `should create route config with no endpoints allowed`() { + // given + val proxySettingsNoEndpoints = ProxySettings( + incoming = Incoming(endpoints = listOf(), permissionsEnabled = true) + ) + + // when + val routeConfig = routesFactory.createSecuredIngressRouteConfig(proxySettingsNoEndpoints) + + // then + routeConfig + .hasSingleVirtualHostThat { + hasStatusVirtualClusters() + hasOneDomain("*") + hasOnlyRoutesInOrder( + metricsRoute(), + statusRoute(), + fallbackIngressRoute() + ) + } + } + + @Test + fun `should create route config with two simple endpoints and response timeout defined`() { + // given + val responseTimeout = Durations.fromSeconds(777) + val idleTimeout = Durations.fromSeconds(61) + val proxySettingsOneEndpoint = ProxySettings( + incoming = Incoming( + endpoints = listOf( + IncomingEndpoint( + path = "/endpoint", + clients = setOf("client1") + ), + IncomingEndpoint( + path = "/products", + clients = setOf("client2"), + methods = setOf("POST") + ) + ), + permissionsEnabled = true, + timeoutPolicy = TimeoutPolicy(idleTimeout, responseTimeout) + ) + ) + + // when + val routeConfig = routesFactory.createSecuredIngressRouteConfig(proxySettingsOneEndpoint) + + // then + routeConfig + .hasSingleVirtualHostThat { + hasStatusVirtualClusters() + hasOneDomain("*") + hasOnlyRoutesInOrder( + metricsRoute(), + statusRoute(idleTimeout, responseTimeout), + { + matchingOnPath("/endpoint") + matchingOnMethod("GET") + accessOnlyForClient("client1") + toCluster("local_service") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["GET"]!!) + matchingOnResponseTimeout(responseTimeout) + matchingOnIdleTimeout(idleTimeout) + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("HEAD") + accessOnlyForClient("client1") + toCluster("local_service") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["HEAD"]!!) + matchingOnResponseTimeout(responseTimeout) + matchingOnIdleTimeout(idleTimeout) + }, + { + matchingOnPath("/endpoint") + matchingOnAnyMethod() + accessOnlyForClient("client1") + toCluster("local_service") + hasNoRetryPolicy() + matchingOnResponseTimeout(responseTimeout) + matchingOnIdleTimeout(idleTimeout) + }, + { + matchingOnPath("/products") + matchingOnMethod("POST") + accessOnlyForClient("client2") + toCluster("local_service") + hasNoRetryPolicy() + matchingOnResponseTimeout(responseTimeout) + matchingOnIdleTimeout(idleTimeout) + }, + fallbackIngressRoute() + ) + matchingRetryPolicy(retryPolicyProps.default) + } + } + + @Test + fun `should create multiple routes for multiple methods and clients`() { + // given + val proxySettings = ProxySettings( + incoming = Incoming( + endpoints = listOf( + IncomingEndpoint( + path = "/endpoint", + clients = setOf("client1", "group1"), + methods = setOf("GET", "POST") + ) + ), + permissionsEnabled = true, + roles = listOf( + Role(name = "group1", clients = setOf("clientB", "other-client")), + Role(name = "group2", clients = setOf("clientC")) + ) + ) + ) + + // when + val routeConfig = routesFactory.createSecuredIngressRouteConfig(proxySettings) + + // then + routeConfig + .hasSingleVirtualHostThat { + hasStatusVirtualClusters() + hasOneDomain("*") + hasOnlyRoutesInOrder( + metricsRoute(), + statusRoute(), + { + matchingOnPath("/endpoint") + matchingOnMethod("GET") + accessOnlyForClient("client1") + toCluster("local_service") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["GET"]!!) + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("POST") + accessOnlyForClient("client1") + toCluster("local_service") + hasNoRetryPolicy() + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("GET") + accessOnlyForClient("clientB") + toCluster("local_service") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["GET"]!!) + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("POST") + accessOnlyForClient("clientB") + toCluster("local_service") + hasNoRetryPolicy() + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("GET") + accessOnlyForClient("other-client") + toCluster("local_service") + matchingRetryPolicy(retryPolicyProps.perHttpMethod["GET"]!!) + }, + { + matchingOnPath("/endpoint") + matchingOnMethod("POST") + accessOnlyForClient("other-client") + toCluster("local_service") + hasNoRetryPolicy() + }, + fallbackIngressRoute() + ) + matchingRetryPolicy(retryPolicyProps.default) + } + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt new file mode 100644 index 000000000..e441df0ab --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotUpdaterTest.kt @@ -0,0 +1,224 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.controlplane.cache.Response +import io.envoyproxy.controlplane.cache.Snapshot +import io.envoyproxy.controlplane.cache.SnapshotCache +import io.envoyproxy.controlplane.cache.StatusInfo +import io.envoyproxy.controlplane.cache.Watch +import io.envoyproxy.envoy.api.v2.DiscoveryRequest +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.Group +import pl.allegro.tech.servicemesh.envoycontrol.groups.Incoming +import pl.allegro.tech.servicemesh.envoycontrol.groups.IncomingEndpoint +import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings +import pl.allegro.tech.servicemesh.envoycontrol.groups.ServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.with +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import reactor.core.publisher.Flux +import reactor.core.scheduler.Schedulers +import java.util.function.Consumer + +class SnapshotUpdaterTest { + + val proxySettings = ProxySettings( + incoming = Incoming( + endpoints = listOf(IncomingEndpoint(path = "/endpoint", clients = setOf("client"))), + permissionsEnabled = true + ) + ) + val groupWithProxy = AllServicesGroup(ads = true, serviceName = "service", proxySettings = proxySettings) + val groupWithServiceName = groupOf(services = listOf("existingService2")).copy(serviceName = "ipsum-service") + + @Test + fun `should generate group snapshots`() { + val cache = newCache() + val uninitializedSnapshot = null + + // groups are generated foreach element in SnapshotCache.groups(), so we need to initialize them + cache.setSnapshot(AllServicesGroup(ads = false), uninitializedSnapshot) + cache.setSnapshot(groupWithProxy, uninitializedSnapshot) + cache.setSnapshot(groupWithServiceName, uninitializedSnapshot) + cache.setSnapshot(groupOf(services = listOf("existingService1")), uninitializedSnapshot) + cache.setSnapshot(groupOf(services = listOf("existingService2")), uninitializedSnapshot) + + cache.setSnapshot(groupOf( + services = listOf("existingService1", "existingService2"), domains = listOf("http://domain") + ), uninitializedSnapshot) + cache.setSnapshot(groupOf(services = listOf("nonExistingService3")), uninitializedSnapshot) + + val updater = SnapshotUpdater( + cache, + properties = SnapshotProperties().apply { + incomingPermissions.enabled = true + }, + scheduler = Schedulers.newSingle("update-snapshot"), + onGroupAdded = Flux.just(true) + ) + + // when + updater.startWithServices("existingService1", "existingService2") + + // then + hasSnapshot(cache, AllServicesGroup(ads = false)) + .hasClusters("existingService1", "existingService2") + + hasSnapshot(cache, groupWithProxy) + .hasClusters("existingService1", "existingService2") + .hasSecuredIngressRoute("/endpoint", "client") + .hasServiceNameRequestHeader("service") + + hasSnapshot(cache, groupOf(services = listOf("existingService1"))) + .hasClusters("existingService1") + + hasSnapshot(cache, groupOf(services = listOf("existingService2"))) + .hasClusters("existingService2") + + hasSnapshot(cache, groupWithServiceName) + .hasClusters("existingService2") + .hasServiceNameRequestHeader("ipsum-service") + + hasSnapshot(cache, groupOf( + services = listOf("existingService1", "existingService2"), domains = listOf("http://domain")) + ).hasClusters("existingService1", "existingService2", "domain_80") + + hasSnapshot(cache, groupOf(listOf("nonExistingService3"))) + .withoutClusters() + } + + @Test + fun `should generate snapshot with empty version and one route`() { + // given + val emptyGroup = groupOf() + + val uninitializedSnapshot = null + val cache = newCache() + cache.setSnapshot(emptyGroup, uninitializedSnapshot) + + val updater = SnapshotUpdater( + cache, + properties = SnapshotProperties(), + scheduler = Schedulers.newSingle("update-snapshot"), + onGroupAdded = Flux.just(true) + ) + + // when + updater.start( + Flux.just(emptyList()) + ).blockFirst() + + // then version is set to empty + val snapshot = hasSnapshot(cache, emptyGroup) + assertThat(snapshot.endpoints().version()).isEqualTo(EndpointsVersion.EMPTY_VERSION.value) + assertThat(snapshot.clusters().version()).isEqualTo(ClustersVersion.EMPTY_VERSION.value) + assertThat(snapshot.listeners().version()).isEqualTo(ListenersVersion.EMPTY_VERSION.value) + assertThat(snapshot.routes().version()).isEqualTo(RoutesVersion.EMPTY_VERSION.value) + + assertThat(snapshot.routes().resources().values).hasSize(2) + // two fallbacks: proxying direct IP requests and 503 for missing services + assertThat(snapshot.routes().resources().values + .first { it.name == "default_routes" }.virtualHostsCount) + .isEqualTo(2) + } + + private fun SnapshotUpdater.startWithServices(vararg services: String) { + this.start( + Flux.just( + listOf( + LocalityAwareServicesState( + ServicesState( + serviceNameToInstances = services.map { it to ServiceInstances(it, emptySet()) }.toMap() + + ), + Locality.LOCAL, "zone" + ) + ) + ) + ).blockFirst() + } + + private fun newCache(): SnapshotCache { + return object : SnapshotCache { + + val groups: MutableMap = mutableMapOf() + + override fun groups(): MutableCollection { + return groups.keys.toMutableList() + } + + override fun getSnapshot(group: Group): Snapshot? { + return groups[group] + } + + override fun setSnapshot(group: Group, snapshot: Snapshot?) { + groups[group] = snapshot + } + + override fun statusInfo(group: Group): StatusInfo { + throw UnsupportedOperationException("not used in testing") + } + + override fun createWatch( + ads: Boolean, + request: DiscoveryRequest, + knownResourceNames: MutableSet, + responseConsumer: Consumer + ): Watch { + throw UnsupportedOperationException("not used in testing") + } + + override fun clearSnapshot(group: Group?): Boolean { + return false + } + } + } + + private fun hasSnapshot(cache: SnapshotCache, group: Group): Snapshot { + val snapshot = cache.getSnapshot(group) + assertThat(snapshot).isNotNull + return snapshot + } + + private fun Snapshot.hasClusters(vararg expected: String): Snapshot { + assertThat(this.clusters().resources().keys.toSet()) + .isEqualTo(expected.toSet()) + return this + } + + private fun Snapshot.hasSecuredIngressRoute(endpoint: String, client: String): Snapshot { + assertThat(this.routes().resources().getValue("ingress_secured_routes").virtualHostsList.first().routesList + .map { it.match } + .filter { it.path == endpoint } + .filter { it.headersList + .any { it.name == "x-service-name" && it.exactMatch == client } + } + ).isNotEmpty + return this + } + + private fun Snapshot.hasServiceNameRequestHeader(serviceName: String): Snapshot { + assertThat(this.routes().resources().getValue("default_routes").requestHeadersToAddList + .map { it.header } + .filter { it.key == "x-service-name" && it.value == serviceName } + ).hasSize(1) + return this + } + + private fun Snapshot.withoutClusters() { + assertThat(this.clusters().resources().keys).isEmpty() + } + + private fun groupOf( + services: List = emptyList(), + domains: List = emptyList() + ) = ServicesGroup( + ads = false, + proxySettings = ProxySettings().with( + serviceDependencies = services.toSet(), domainDependencies = domains.toSet() + ) + ) +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersionsTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersionsTest.kt new file mode 100644 index 000000000..50c047e28 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/snapshot/SnapshotsVersionsTest.kt @@ -0,0 +1,162 @@ +package pl.allegro.tech.servicemesh.envoycontrol.snapshot + +import io.envoyproxy.envoy.api.v2.Cluster +import io.envoyproxy.envoy.api.v2.ClusterLoadAssignment +import io.envoyproxy.envoy.api.v2.core.Address +import io.envoyproxy.envoy.api.v2.core.SocketAddress +import io.envoyproxy.envoy.api.v2.endpoint.Endpoint +import io.envoyproxy.envoy.api.v2.endpoint.LbEndpoint +import io.envoyproxy.envoy.api.v2.endpoint.LocalityLbEndpoints +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.groups.AllServicesGroup +import pl.allegro.tech.servicemesh.envoycontrol.groups.Incoming +import pl.allegro.tech.servicemesh.envoycontrol.groups.IncomingEndpoint +import pl.allegro.tech.servicemesh.envoycontrol.groups.Outgoing +import pl.allegro.tech.servicemesh.envoycontrol.groups.PathMatchingType +import pl.allegro.tech.servicemesh.envoycontrol.groups.ProxySettings +import pl.allegro.tech.servicemesh.envoycontrol.groups.Role + +internal class SnapshotsVersionsTest { + + private val snapshotsVersions = SnapshotsVersions() + + private val group = AllServicesGroup(ads = false) + private val clusters = listOf(cluster(name = "service1")) + private val endpoints = listOf(endpoints(clusterName = "service1", instances = 1)) + + @Test + fun `should generate a new version for a new group`() { + // when + val versions = snapshotsVersions.version(group, clusters, endpoints) + + // then + assertThat(versions.clusters).isNotNull() + assertThat(versions.endpoints).isNotNull() + } + + @Test + fun `should generate new version only for endpoints when they are different`() { + // given + val versions = snapshotsVersions.version(group, clusters, endpoints) + + // when + val newEndpoints = listOf(endpoints(clusterName = "service1", instances = 2)) + val newVersions = snapshotsVersions.version(group, clusters, newEndpoints) + + // then + assertThat(newVersions.clusters).isEqualTo(versions.clusters) + assertThat(newVersions.endpoints).isNotEqualTo(versions.endpoints) + } + + @Test + fun `should generate new version only for clusters when they are different`() { + // given + val versions = snapshotsVersions.version(group, clusters, endpoints) + + // when + val newClusters = listOf(cluster(name = "service1"), cluster(name = "service2")) + val newVersions = snapshotsVersions.version(group, newClusters, endpoints) + + // then + assertThat(newVersions.endpoints).isEqualTo(versions.endpoints) + assertThat(newVersions.clusters).isNotEqualTo(versions.clusters) + } + + @Test + fun `should retain versions only for given groups`() { + // given + val versions = snapshotsVersions.version(group, clusters, endpoints) + + // when nothing changed but the group is not retained + snapshotsVersions.retainGroups(emptyList()) + val newVersions = snapshotsVersions.version(group, clusters, endpoints) + + // then new version is generated even that clusters and endpoints are the same + assertThat(newVersions.endpoints).isNotEqualTo(versions.endpoints) + assertThat(newVersions.clusters).isNotEqualTo(versions.clusters) + } + + @Test + fun `should return same version for equal group when nothing changed`() { + // given + val version = snapshotsVersions.version(createGroup("/path"), clusters, endpoints) + + // when + val newVersion = snapshotsVersions.version(createGroup("/path"), clusters, endpoints) + + // then + assertThat(version).isEqualTo(newVersion) + } + + @Test + fun `should return different version for different group when nothing changed`() { + // given + val version = snapshotsVersions.version(createGroup("/path"), clusters, endpoints) + + // when + val newVersion = snapshotsVersions.version(createGroup("/other-path"), clusters, endpoints) + + // then + assertThat(version).isNotEqualTo(newVersion) + } + + private fun cluster(name: String): Cluster { + return Cluster.newBuilder() + .setName(name) + .build() + } + + private fun endpoints(clusterName: String, instances: Int): ClusterLoadAssignment { + return ClusterLoadAssignment.newBuilder() + .addAllEndpoints( + (0..instances).map { instance -> + LocalityLbEndpoints.newBuilder() + .addLbEndpoints( + LbEndpoint.newBuilder() + .setEndpoint( + Endpoint.newBuilder() + .setAddress( + Address.newBuilder() + .setSocketAddress( + SocketAddress.newBuilder() + .setAddress("127.0.0.1") + .setPortValue(instance) + ) + ) + ) + ) + .build() + } + ) + .setClusterName(clusterName) + .build() + } + + private fun createGroup(endpointPath: String): AllServicesGroup { + return AllServicesGroup( + ads = false, + serviceName = "name", + proxySettings = ProxySettings( + incoming = Incoming( + endpoints = listOf( + IncomingEndpoint( + path = endpointPath, + pathMatchingType = PathMatchingType.PATH, + methods = setOf("GET", "PUT"), + clients = setOf("client1", "role1") + ) + ), + permissionsEnabled = true, + roles = listOf( + Role( + name = "role1", + clients = setOf("client2", "client3") + ) + ) + ), + outgoing = Outgoing(listOf()) + ) + ) + } +} diff --git a/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServicesTest.kt b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServicesTest.kt new file mode 100644 index 000000000..c6c00cea5 --- /dev/null +++ b/envoy-control/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/synchronization/CrossDcServicesTest.kt @@ -0,0 +1,175 @@ +package pl.allegro.tech.servicemesh.envoycontrol.synchronization + +import io.micrometer.core.instrument.simple.SimpleMeterRegistry +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstance +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import reactor.core.publisher.Mono +import reactor.test.StepVerifier +import java.net.URI +import java.time.Duration + +class CrossDcServicesTest { + @Test + fun `should collect responses from all DCs`() { + val service = CrossDcServices(asyncClient(), SimpleMeterRegistry(), fetcher(), listOf("dc1", "dc2")) + + val result = service + .getChanges(1) + .blockFirst() + ?: emptySet() + + assertThat(result).hasSize(2) + assertThat(result.map { it.zone }.toSet()).isEqualTo(setOf("dc1", "dc2")) + } + + @Test + fun `should ignore DC without service instances`() { + val service = CrossDcServices(asyncClient(), SimpleMeterRegistry(), fetcher(), listOf("dc1", "dc2/noinstances")) + + val result = service + .getChanges(1) + .blockFirst() + ?: emptySet() + + assertThat(result).hasSize(1) + assertThat(result.map { it.zone }).contains("dc1") + } + + @Test + fun `should skip failing responses if not cached`() { + val service = CrossDcServices( + asyncClient(), + SimpleMeterRegistry(), + fetcher(), + listOf("dc1", "dc2/error", "dc3", "dc4/error") + ) + + val result = service + .getChanges(1) + .blockFirst() + ?: emptySet() + + assertThat(result).isNotEmpty + assertThat(result.flatMap { it.servicesState.serviceNames() }.toSet()).isEqualTo(setOf("dc1", "dc3")) + } + + @Test + fun `should serve cached responses when a cross dc request fails`() { + // given + val service = CrossDcServices(asyncClient(), SimpleMeterRegistry(), fetcher(), listOf("dc1/successful-states", "dc2/second-request-failing")) + + val successfulResult = service + .getChanges(1) + .blockFirst() + ?: emptySet() + + assertThat(successfulResult).containsExactlyInAnyOrder(*(expectedSuccessfulState.toTypedArray())) + + val oneInstanceFailing = service + .getChanges(1) + .blockFirst() + ?: emptySet() + + assertThat(oneInstanceFailing).containsExactlyInAnyOrder(*(expectedStateWithOneRequestFailing.toTypedArray())) + } + + @Test + fun `should not emit a value when all requests fail`() { + // given + val service = CrossDcServices(asyncClient(), SimpleMeterRegistry(), fetcher(), listOf("dc2/error")) + val duration = 1L + + StepVerifier.create( + service + // when + .getChanges(duration) + ) + + // then + .expectSubscription() + .expectNoEvent(Duration.ofSeconds(duration * 2)) + .thenCancel() + .verify() + } + + private fun fetcher(): ControlPlaneInstanceFetcher { + return object : ControlPlaneInstanceFetcher { + override fun instances(dc: String): List { + val uri = URI.create("http://$dc") + if (uri.path == "/noinstances") + return emptyList() + return listOf(uri) + } + } + } + + private val servicesState1 = ServicesState( + serviceNameToInstances = mapOf( + "service-a" to ServiceInstances( + "service-a", + setOf(ServiceInstance("1", setOf(), "localhost", 80)) + ) + ) + ) + private val servicesState2 = ServicesState( + serviceNameToInstances = mapOf( + "service-a" to ServiceInstances( + "service-a", + setOf(ServiceInstance("2", setOf(), "localhost", 8080)) + ) + ) + ) + + private val servicesState3 = ServicesState( + serviceNameToInstances = mapOf( + "service-b" to ServiceInstances( + "service-b", + setOf(ServiceInstance("3", setOf(), "localhost", 81)) + ) + ) + ) + + private val expectedSuccessfulState = setOf( + LocalityAwareServicesState(servicesState1, Locality.REMOTE, "dc1/successful-states"), + LocalityAwareServicesState(servicesState3, Locality.REMOTE, "dc2/second-request-failing") + ) + + private val expectedStateWithOneRequestFailing = setOf( + LocalityAwareServicesState(servicesState2, Locality.REMOTE, "dc1/successful-states"), + LocalityAwareServicesState(servicesState3, Locality.REMOTE, "dc2/second-request-failing") + ) + + private val successfulStatesSequence = listOf( + Mono.just(servicesState1), Mono.just(servicesState2) + ).iterator() + + private val secondStateFailingSequence = listOf( + Mono.just(servicesState3), Mono.error(RuntimeException("Error fetching from dc2")) + ).iterator() + + private fun asyncClient(): AsyncControlPlaneClient { + return object : AsyncControlPlaneClient { + override fun getState(uri: URI): Mono = when { + uri.path == "/error" -> Mono.error(RuntimeException(uri.toString())) + uri.path == "/empty" -> Mono.empty() + uri.path == "/successful-states" -> successfulStatesSequence.next() + uri.path == "/second-request-failing" -> secondStateFailingSequence.next() + else -> Mono.just( + ServicesState( + serviceNameToInstances = mapOf( + uri.authority to ServiceInstances( + uri.authority, + emptySet() + ) + ) + ) + ) + } + } + } +} diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 000000000..87b738cbd Binary files /dev/null and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 000000000..44e7c4d1d --- /dev/null +++ b/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-5.2.1-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew new file mode 100755 index 000000000..af6708ff2 --- /dev/null +++ b/gradlew @@ -0,0 +1,172 @@ +#!/usr/bin/env sh + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin, switch paths to Windows format before running java +if $cygwin ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=$((i+1)) + done + case $i in + (0) set -- ;; + (1) set -- "$args0" ;; + (2) set -- "$args0" "$args1" ;; + (3) set -- "$args0" "$args1" "$args2" ;; + (4) set -- "$args0" "$args1" "$args2" "$args3" ;; + (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=$(save "$@") + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +# by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong +if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then + cd "$(dirname "$0")" +fi + +exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat new file mode 100644 index 000000000..0f8d5937c --- /dev/null +++ b/gradlew.bat @@ -0,0 +1,84 @@ +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 000000000..fa792502c --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,47 @@ +site_name: Envoy Control documentation +site_description: Envoy Control - Service Mesh Control Plane for Envoy Proxy +site_author: Allegro.pl + +docs_dir: docs + +#repo_url: https://github.com/allegro/envoy-control +#repo_name: 'envoy-control' + +copyright: 'Allegro.pl' + +theme: + name: material + logo: + icon: code + palette: + primary: purple + accent: purple + +extra_javascript: + - 'assets/extra.js' + +markdown_extensions: + - codehilite + - admonition + - toc: + permalink: "#" + +nav: + - About: index.md + - Quickstart: quickstart.md + - Architecture: architecture.md + - Configuration: configuration.md + - Integrations: + - Envoy: integrations/envoy.md + - Consul: integrations/consul.md + - Features: + - Multi DC support: features/multi_dc_support.md + - Permissions: features/permissions.md + - Service Transformers: features/service_transformers.md + - Development: development.md + - Performance: performance.md + - Deployment: + - Observability: deployment/observability.md + - Deployment: deployment/deployment.md + - Envoy Control vs other software: ec_vs_other_software.md + diff --git a/readme.md b/readme.md new file mode 100644 index 000000000..5c58bb40e --- /dev/null +++ b/readme.md @@ -0,0 +1,13 @@ +# Envoy Control + +Envoy Control is a production-ready Control Plane for Service Mesh based on [Envoy Proxy](https://www.envoyproxy.io/) +Data Plane that is platform agnostic. + +# Docs + +Full docs are hosted at ... + +# Quick start + +Quick start guide is located in ... + diff --git a/services/build.gradle b/services/build.gradle new file mode 100644 index 000000000..a9ea8e8a7 --- /dev/null +++ b/services/build.gradle @@ -0,0 +1,4 @@ +dependencies { + compile group: 'org.jetbrains.kotlin', name: 'kotlin-stdlib', version: versions.kotlin + compile group: 'io.projectreactor', name: 'reactor-core', version: versions.reactor +} \ No newline at end of file diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalServiceChanges.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalServiceChanges.kt new file mode 100644 index 000000000..1560dae4e --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalServiceChanges.kt @@ -0,0 +1,8 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +import java.util.concurrent.atomic.AtomicReference + +interface LocalServiceChanges : ServiceChanges { + val latestServiceState: AtomicReference + fun isServiceStateLoaded(): Boolean +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalityAwareServicesState.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalityAwareServicesState.kt new file mode 100644 index 000000000..95389e7cd --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/LocalityAwareServicesState.kt @@ -0,0 +1,11 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +data class LocalityAwareServicesState( + val servicesState: ServicesState, + val locality: Locality, + val zone: String +) + +enum class Locality { + LOCAL, REMOTE +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceChanges.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceChanges.kt new file mode 100644 index 000000000..e96ab4b17 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceChanges.kt @@ -0,0 +1,7 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +import reactor.core.publisher.Flux + +interface ServiceChanges { + fun stream(): Flux> +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstance.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstance.kt new file mode 100644 index 000000000..20b71c936 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstance.kt @@ -0,0 +1,8 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +data class ServiceInstance( + val id: String, + val tags: Set, + val address: String, + val port: Int +) diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstances.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstances.kt new file mode 100644 index 000000000..a99dc9af5 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServiceInstances.kt @@ -0,0 +1,13 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +data class ServiceInstances( + val serviceName: String, + val instances: Set +) { + fun withoutEmptyAddressInstances(): ServiceInstances = + if (instances.any { it.address.isBlank() }) { + copy(instances = instances.asSequence() + .filter { it.address.isNotBlank() } + .toSet()) + } else this +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServicesState.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServicesState.kt new file mode 100644 index 000000000..e8126148a --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/ServicesState.kt @@ -0,0 +1,26 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services + +typealias ServiceName = String + +data class ServicesState( + val serviceNameToInstances: Map = emptyMap() +) { + operator fun get(serviceName: ServiceName): ServiceInstances? = serviceNameToInstances[serviceName] + + fun hasService(serviceName: String): Boolean = serviceNameToInstances.containsKey(serviceName) + fun serviceNames(): Set = serviceNameToInstances.keys + fun allInstances(): Collection = serviceNameToInstances.values + + fun remove(serviceName: ServiceName): ServicesState { + // TODO: + return change(ServiceInstances(serviceName, instances = emptySet())) + } + + fun add(serviceName: ServiceName): ServicesState = + if (serviceNameToInstances.containsKey(serviceName)) this + else change(ServiceInstances(serviceName, instances = emptySet())) + + fun change(serviceInstances: ServiceInstances): ServicesState = + if (serviceNameToInstances[serviceInstances.serviceName] == serviceInstances) this + else copy(serviceNameToInstances = serviceNameToInstances + (serviceInstances.serviceName to serviceInstances)) +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/EmptyAddressFilter.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/EmptyAddressFilter.kt new file mode 100644 index 000000000..6f2c47ae4 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/EmptyAddressFilter.kt @@ -0,0 +1,9 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services.transformers + +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances + +class EmptyAddressFilter : ServiceInstancesTransformer { + + override fun transform(services: Sequence): Sequence = + services.map { serviceInstances -> serviceInstances.withoutEmptyAddressInstances() } +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/IpAddressFilter.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/IpAddressFilter.kt new file mode 100644 index 000000000..ac49e153b --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/IpAddressFilter.kt @@ -0,0 +1,16 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services.transformers + +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances + +/** + * TODO Envoy & Envoy Control supports only IP and not hostnames + */ +class IpAddressFilter : ServiceInstancesTransformer { + + override fun transform(services: Sequence): Sequence = + services.filter { (_, instances) -> + instances.all { isIpAddress(it.address) } + } + + private fun isIpAddress(address: String): Boolean = address.all { !it.isLetter() } +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/RegexServiceInstancesFilter.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/RegexServiceInstancesFilter.kt new file mode 100644 index 000000000..a82396b07 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/RegexServiceInstancesFilter.kt @@ -0,0 +1,11 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services.transformers + +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances + +class RegexServiceInstancesFilter(private val excludedRegexes: Collection) : ServiceInstancesTransformer { + + override fun transform(services: Sequence): Sequence = + services.filter { (serviceName, _) -> + !excludedRegexes.any { serviceName.matches(it) } + } +} diff --git a/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/ServiceInstancesTransformer.kt b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/ServiceInstancesTransformer.kt new file mode 100644 index 000000000..f080fdfd3 --- /dev/null +++ b/services/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/services/transformers/ServiceInstancesTransformer.kt @@ -0,0 +1,7 @@ +package pl.allegro.tech.servicemesh.envoycontrol.services.transformers + +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances + +interface ServiceInstancesTransformer { + fun transform(services: Sequence): Sequence +} diff --git a/settings.gradle b/settings.gradle new file mode 100644 index 000000000..77956d01b --- /dev/null +++ b/settings.gradle @@ -0,0 +1,8 @@ +rootProject.name = 'envoy-control-root' + +include 'services' +include 'envoy-control' +include 'envoy-control-runner' +include 'envoy-control-tests' +include 'source-consul' + diff --git a/source-consul/build.gradle b/source-consul/build.gradle new file mode 100644 index 000000000..8e40fd8a0 --- /dev/null +++ b/source-consul/build.gradle @@ -0,0 +1,17 @@ +dependencies { + implementation project(':envoy-control') + + compile group: 'org.jetbrains.kotlin', name: 'kotlin-stdlib', version: versions.kotlin + compile group: 'io.projectreactor', name: 'reactor-core', version: versions.reactor + compile group: 'pl.allegro.tech.discovery', name: 'consul-recipes', version: versions.consul_recipes + compile group: 'com.ecwid.consul', name: 'consul-api', version: versions.ecwid_consul + + testCompile group: 'org.mockito', name: 'mockito-core', version: versions.mockito + testCompile group: 'cglib', name: 'cglib-nodep', version: versions.cglib + + testCompile(group: 'com.pszymczyk.consul', name: 'embedded-consul', version: versions.embedded_consul) { + exclude group: 'org.apache.httpcomponents', module: 'httpclient' + } + + testCompile group: 'io.projectreactor', name: 'reactor-test', version: versions.reactor +} \ No newline at end of file diff --git a/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/ConsulProperties.kt b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/ConsulProperties.kt new file mode 100644 index 000000000..1304646a8 --- /dev/null +++ b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/ConsulProperties.kt @@ -0,0 +1,20 @@ +@file:Suppress("MagicNumber") + +package pl.allegro.tech.servicemesh.envoycontrol.consul + +import java.time.Duration + +class ConsulProperties { + var host: String = "localhost" + var port = 8500 + var subscriptionDelay: Duration = Duration.ofMillis(20) // max 50 subscription/s + var watcher = ConsulWatcherOkHttpProperties() +} + +class ConsulWatcherOkHttpProperties { + var readTimeout: Duration = Duration.ofMinutes(6) + var connectTimeout: Duration = Duration.ofSeconds(2) + var maxRequests = 1000 + var dispatcherMaxPoolSize = 2000 + var dispatcherPoolKeepAliveTime: Duration = Duration.ofSeconds(30) +} diff --git a/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulLocalServiceChanges.kt b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulLocalServiceChanges.kt new file mode 100644 index 000000000..8d6746fd6 --- /dev/null +++ b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulLocalServiceChanges.kt @@ -0,0 +1,35 @@ +package pl.allegro.tech.servicemesh.envoycontrol.consul.services + +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalServiceChanges +import pl.allegro.tech.servicemesh.envoycontrol.services.Locality +import pl.allegro.tech.servicemesh.envoycontrol.services.LocalityAwareServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import pl.allegro.tech.servicemesh.envoycontrol.services.transformers.ServiceInstancesTransformer +import reactor.core.publisher.Flux +import java.util.concurrent.atomic.AtomicReference + +class ConsulLocalServiceChanges( + private val consulChanges: ConsulServiceChanges, + private val locality: Locality, + private val localDc: String, + private val transformers: List = emptyList(), + override val latestServiceState: AtomicReference = AtomicReference(ServicesState()) +) : LocalServiceChanges { + override fun stream(): Flux> = + consulChanges + .watchState() + .map { state -> + transformers + .fold(state.allInstances().asSequence()) { instancesSequence, transformer -> + transformer.transform(instancesSequence) + } + .associateBy { it.serviceName } + .let(::ServicesState) + } + .doOnNext { latestServiceState.set(it) } + .map { + setOf(LocalityAwareServicesState(it, locality, localDc)) + } + + override fun isServiceStateLoaded(): Boolean = latestServiceState.get() != ServicesState() +} diff --git a/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChanges.kt b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChanges.kt new file mode 100644 index 000000000..b8954c2f0 --- /dev/null +++ b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChanges.kt @@ -0,0 +1,182 @@ +package pl.allegro.tech.servicemesh.envoycontrol.consul.services + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.KotlinModule +import pl.allegro.tech.discovery.consul.recipes.json.JacksonJsonDeserializer +import pl.allegro.tech.discovery.consul.recipes.watch.Canceller +import pl.allegro.tech.discovery.consul.recipes.watch.ConsulWatcher +import pl.allegro.tech.discovery.consul.recipes.watch.catalog.ServicesWatcher +import pl.allegro.tech.discovery.consul.recipes.watch.health.HealthServiceInstancesWatcher +import pl.allegro.tech.servicemesh.envoycontrol.DefaultEnvoyControlMetrics +import pl.allegro.tech.servicemesh.envoycontrol.EnvoyControlMetrics +import pl.allegro.tech.servicemesh.envoycontrol.logger +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstance +import pl.allegro.tech.servicemesh.envoycontrol.services.ServiceInstances +import pl.allegro.tech.servicemesh.envoycontrol.services.ServicesState +import reactor.core.publisher.Flux +import reactor.core.publisher.FluxSink +import java.time.Duration +import java.util.concurrent.ConcurrentHashMap +import pl.allegro.tech.discovery.consul.recipes.watch.catalog.ServiceInstances as RecipesServiceInstances +import pl.allegro.tech.discovery.consul.recipes.watch.catalog.Services as RecipesServices + +class ConsulServiceChanges( + private val watcher: ConsulWatcher, + private val metrics: EnvoyControlMetrics = DefaultEnvoyControlMetrics(), + private val objectMapper: ObjectMapper = ObjectMapper().registerModule(KotlinModule()), + private val subscriptionDelay: Duration = Duration.ZERO +) { + + fun watchState(): Flux { + val watcher = StateWatcher(watcher, objectMapper, metrics, subscriptionDelay) + return Flux.create( + { sink -> + watcher.stateReceiver = { sink.next(it) } + }, + FluxSink.OverflowStrategy.LATEST + ) + .distinctUntilChanged() + .doOnSubscribe { watcher.start() } + .doOnCancel { watcher.close() } + } + + private class StateWatcher( + private val watcher: ConsulWatcher, + private val objectMapper: ObjectMapper, + private val metrics: EnvoyControlMetrics, + private val subscriptionDelay: Duration + ) : AutoCloseable { + lateinit var stateReceiver: (ServicesState) -> (Unit) + + val logger by logger() + + @Volatile + private var canceller: Canceller? = null + + @Volatile + private var state = ServicesState() + private val stateLock = Any() + private val watchedServices = mutableMapOf() + + @Volatile + private var lastServices = setOf() + private val servicesLock = Any() + + private val initialLoader = InitialLoader() + + fun start() { + if (canceller == null) { + synchronized(StateWatcher::class.java) { + if (canceller == null) { + canceller = ServicesWatcher(watcher, JacksonJsonDeserializer(objectMapper)) + .watch( + { servicesResult -> handleServicesChange(servicesResult.body) }, + { error -> logger.warn("Error while watching services list", error) } + ) + } + } + } + } + + override fun close() { + synchronized(stateLock) { + watchedServices.values.forEach { canceller -> canceller.cancel() } + watchedServices.clear() + } + canceller?.cancel() + } + + private fun handleServicesChange(services: RecipesServices) = synchronized(servicesLock) { + initialLoader.update(services.serviceNames()) + + val newServices = services.serviceNames() - lastServices + newServices.forEach { service -> + handleNewService(service) + Thread.sleep(subscriptionDelay.toMillis()) + } + + val removedServices = lastServices - services.serviceNames() + removedServices.forEach { handleServiceRemoval(it) } + + lastServices = services.serviceNames() + } + + private fun handleNewService(service: String) = synchronized(stateLock) { + val instancesWatcher = HealthServiceInstancesWatcher( + service, watcher, JacksonJsonDeserializer(objectMapper) + ) + logger.info("Start watching $service on ${instancesWatcher.endpoint()}") + val canceller = instancesWatcher.watch( + { instances -> handleServiceInstancesChange(instances.body) }, + { error -> logger.warn("Error while watching service $service", error) } + ) + val oldCanceller = watchedServices.put(service, canceller) + oldCanceller?.cancel() + + val newState = state.add(service) + changeState(newState) + metrics.serviceAdded() + } + + private fun handleServiceInstancesChange(recipesInstances: RecipesServiceInstances) = synchronized(stateLock) { + initialLoader.observed(recipesInstances.serviceName) + + val instances = recipesInstances.toDomainInstances() + val newState = state.change(instances) + if (state !== newState) { + val addresses = instances.instances.joinToString { "[${it.id} - ${it.address}:${it.port}]" } + logger.info("Instances for ${instances.serviceName} changed: $addresses") + + changeState(newState) + metrics.instanceChanged() + } + } + + private fun RecipesServiceInstances.toDomainInstances(): ServiceInstances = + ServiceInstances( + serviceName, + instances.asSequence() + .map { ServiceInstance(it.serviceId, it.serviceTags.toSet(), it.serviceAddress, it.servicePort) } + .toSet() + ) + + private fun handleServiceRemoval(service: String) = synchronized(stateLock) { + logger.info("Stop watching $service") + val newState = state.remove(service) + changeState(newState) + watchedServices[service]?.cancel() + watchedServices.remove(service) + metrics.serviceRemoved() + } + + private fun changeState(newState: ServicesState) { + if (initialLoader.ready) { + stateReceiver(newState) + } + state = newState + } + + private class InitialLoader { + private val remaining = ConcurrentHashMap.newKeySet() + @Volatile + private var initialized = false + @Volatile + var ready = false + private set + + fun update(services: Collection) { + if (!initialized) { + remaining.addAll(services) + initialized = true + } + } + + fun observed(service: String) { + if (!ready) { + remaining.remove(service) + ready = remaining.isEmpty() + } + } + } + } +} diff --git a/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/synchronization/SimpleConsulInstanceFetcher.kt b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/synchronization/SimpleConsulInstanceFetcher.kt new file mode 100644 index 000000000..17d50d58c --- /dev/null +++ b/source-consul/src/main/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/synchronization/SimpleConsulInstanceFetcher.kt @@ -0,0 +1,23 @@ +package pl.allegro.tech.servicemesh.envoycontrol.consul.synchronization + +import com.ecwid.consul.v1.ConsulClient +import com.ecwid.consul.v1.QueryParams +import com.ecwid.consul.v1.health.model.HealthService +import pl.allegro.tech.servicemesh.envoycontrol.synchronization.ControlPlaneInstanceFetcher +import java.net.URI + +class SimpleConsulInstanceFetcher( + private val consulClient: ConsulClient, + private val envoyControlAppName: String +) : ControlPlaneInstanceFetcher { + + override fun instances(dc: String): List = toServiceUri(findInstances(dc)) + + private fun toServiceUri(instances: MutableList) = + instances.map { instance -> createURI(instance.service.address, instance.service.port) } + + private fun findInstances(nonLocalDc: String) = + consulClient.getHealthServices(envoyControlAppName, true, QueryParams(nonLocalDc)).value + + private fun createURI(host: String, port: Int) = URI.create("http://$host:$port/") +} diff --git a/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesDisposeTest.kt b/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesDisposeTest.kt new file mode 100644 index 000000000..1b9947317 --- /dev/null +++ b/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesDisposeTest.kt @@ -0,0 +1,25 @@ +package pl.allegro.tech.servicemesh.envoycontrol.consul.services + +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.Test +import org.mockito.Mockito +import org.mockito.Mockito.`when` +import org.mockito.Mockito.verify +import pl.allegro.tech.discovery.consul.recipes.watch.Canceller +import pl.allegro.tech.discovery.consul.recipes.watch.ConsulWatcher + +class ConsulServiceChangesDisposeTest { + + @Test + fun `should start watching and stop watching after dispose`() { + val watcher = Mockito.mock(ConsulWatcher::class.java) + val callbackCanceller = Canceller() + `when`(watcher.watchEndpoint(Mockito.eq("/v1/catalog/services"), Mockito.any(), Mockito.any())).thenReturn(callbackCanceller) + + val recipes = ConsulServiceChanges(watcher) + recipes.watchState().subscribe().dispose() + + verify(watcher).watchEndpoint(Mockito.eq("/v1/catalog/services"), Mockito.any(), Mockito.any()) + assertThat(callbackCanceller.isCancelled).isTrue() + } +} diff --git a/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesTest.kt b/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesTest.kt new file mode 100644 index 000000000..815df636b --- /dev/null +++ b/source-consul/src/test/kotlin/pl/allegro/tech/servicemesh/envoycontrol/consul/services/ConsulServiceChangesTest.kt @@ -0,0 +1,99 @@ +package pl.allegro.tech.servicemesh.envoycontrol.consul.services + +import com.ecwid.consul.v1.agent.AgentConsulClient +import com.ecwid.consul.v1.agent.model.NewService +import com.pszymczyk.consul.ConsulStarterBuilder +import com.pszymczyk.consul.infrastructure.Ports +import com.pszymczyk.consul.junit.ConsulExtension +import org.assertj.core.api.Assertions.assertThat +import org.junit.jupiter.api.AfterEach +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.extension.RegisterExtension +import pl.allegro.tech.discovery.consul.recipes.ConsulRecipes +import reactor.test.StepVerifier +import java.net.URI +import java.util.UUID +import java.util.concurrent.Executors + +class ConsulServiceChangesTest { + + companion object { + private val consulHttpPort = Ports.nextAvailable() + + @JvmField + @RegisterExtension + val consul = ConsulExtension(ConsulStarterBuilder.consulStarter().withHttpPort(consulHttpPort).build()) + } + + private val watcher = ConsulRecipes + .consulRecipes() + .build() + .consulWatcher(Executors.newFixedThreadPool(10)) + .withAgentUri(URI("http://localhost:${consul.httpPort}")) + .build() + private val changes = ConsulServiceChanges(watcher) + private val client = AgentConsulClient("localhost", consul.httpPort) + + @AfterEach + fun reset() { + watcher.close() + consul.reset() + } + + @Test + fun `should watch changes of consul state`() { + StepVerifier.create(changes.watchState()) + .expectNextCount(1) // events: add(consul) + change(consul) happened during graceful startup + .then { registerService(id = "123", name = "abc") } + .expectNextMatches { it.hasService("abc") } + .assertNext { + assertThat(it.hasService("consul")).isTrue() + assertThat(it.hasService("abc")).isTrue() + assertThat(it["abc"]).isNotNull() + assertThat(it["abc"]!!.instances).hasSize(1) + it["abc"]!!.instances.first().run { + assertThat(id).isEqualTo("123") + assertThat(address).isEqualTo("localhost") + assertThat(port).isEqualTo(1234) + assertThat(tags).containsExactly("a") + } + } + .then { deregisterService(id = "123") } + .expectNextMatches { it["abc"]!!.instances.isEmpty() } + .thenCancel() + .verify() + } + + @Test + fun `should produce first event with all services`() { + registerService(id = "service1", name = "service1") + registerService(id = "service2", name = "service2") + + StepVerifier.create(changes.watchState()) + .expectNextMatches { it.serviceNames() == setOf("consul", "service1", "service2") } + .then { registerService(id = "service3", name = "service3") } + .thenRequest(1) // events: add(service3) + .expectNextMatches { it.serviceNames() == setOf("consul", "service1", "service2", "service3") } + .thenCancel() + .verify() + } + + private fun registerService( + id: String = UUID.randomUUID().toString(), + name: String = "sample" + ): String { + val service = NewService().also { + it.id = id + it.name = name + it.address = "localhost" + it.port = 1234 + it.tags = listOf("a") + } + client.agentServiceRegister(service) + return service.id + } + + private fun deregisterService(id: String) { + client.agentServiceDeregister(id) + } +} diff --git a/tools/docker-compose.yaml b/tools/docker-compose.yaml new file mode 100644 index 000000000..ea731d590 --- /dev/null +++ b/tools/docker-compose.yaml @@ -0,0 +1,45 @@ +version: '3' + +services: + consul: + container_name: consul + image: consul:latest + ports: + - "18500:8500" + - "18300:8300" + volumes: + - /tmp/config:/config + - /tmp/_data/consul:/data + command: agent -server -data-dir=/data -bind 0.0.0.0 -client 0.0.0.0 -bootstrap-expect=1 -ui + + http-echo: + depends_on: + - consul + build: + context: ./service + dockerfile: Dockerfile + + envoy: # TODO: - use image provided by envoy-wrapper to reduce need for config synchronization + build: + context: ./envoy + dockerfile: Dockerfile + ports: + - "9999:9999" + - "31000:80" + - "31001:8080" + + envoy-control: + container_name: envoy-control + build: + context: ../ + dockerfile: tools/envoy-control/Dockerfile + ports: + - "8080:8080" + - "50000:50000" + # here you can define path to your config by replacing /tmp/custom.yaml with your own config + volumes: + - "/tmp/custom.yaml:/var/tmp/config/application.yaml" + depends_on: + - consul + environment: + - ENVOY_CONTROL_PROPERTIES= diff --git a/tools/envoy-control/Dockerfile b/tools/envoy-control/Dockerfile new file mode 100644 index 000000000..ea482f14f --- /dev/null +++ b/tools/envoy-control/Dockerfile @@ -0,0 +1,17 @@ +FROM adoptopenjdk/openjdk11:alpine-jre + +COPY tools/envoy-control/run.sh /usr/local/bin/run.sh +ADD envoy-control-runner/build/distributions/envoy-control-runner-*.tar /var/tmp/ + +RUN mv /var/tmp/envoy-control-runner* /var/tmp/envoy-control-runner +RUN mkdir /var/tmp/config + +VOLUME /var/tmp/config + +WORKDIR /usr/local/bin/ + +# APP_PORT: 8080 +# XDS_PORT: 50000 +EXPOSE 8080 50000 + +CMD ["sh", "run.sh"] \ No newline at end of file diff --git a/tools/envoy-control/run.sh b/tools/envoy-control/run.sh new file mode 100755 index 000000000..8f12ce65e --- /dev/null +++ b/tools/envoy-control/run.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env sh + +START_ARGUMENTS="" +CONFIG_FILE=/var/tmp/config/application.yaml + +if [ -f "$CONFIG_FILE" ]; then + START_ARGUMENTS="--spring.config.location=file:$CONFIG_FILE " +fi + +if [ ! -z "${ENVOY_CONTROL_PROPERTIES}" ]; then + START_ARGUMENTS="$START_ARGUMENTS $ENVOY_CONTROL_PROPERTIES" +fi + +echo "Launching Envoy-control with $START_ARGUMENTS" + +/var/tmp/envoy-control-runner/bin/envoy-control-runner $START_ARGUMENTS \ No newline at end of file diff --git a/tools/envoy/Dockerfile b/tools/envoy/Dockerfile new file mode 100644 index 000000000..8d9934afb --- /dev/null +++ b/tools/envoy/Dockerfile @@ -0,0 +1,24 @@ +FROM envoyproxy/envoy + +ENV PORT=9999:9999 +ENV PORT=80:80 +ENV ENVOY_NODE_ID=front-proxy-id +ENV ENVOY_NODE_CLUSTER=front-proxy +ENV ENVOY_EGRESS_LISTENER_PORT=80 +ENV ENVOY_INGRESS_LISTENER_PORT=8080 +ENV ENVOY_ADMIN_PORT=9999 +ENV ENVOY_XDS_PORT=50000 +ENV ENVOY_XDS_HOST=host.docker.internal + +ADD envoy-template.yaml /etc/envoy/envoy.yaml +RUN sed -i "s/{{.EgressListenerPort}}/${ENVOY_EGRESS_LISTENER_PORT}/g" /etc/envoy/envoy.yaml +RUN sed -i "s/{{.IngressListenerPort}}/${ENVOY_INGRESS_LISTENER_PORT}/g" /etc/envoy/envoy.yaml +RUN sed -i "s/{{.XdsHost}}/${ENVOY_XDS_HOST}/g" /etc/envoy/envoy.yaml +RUN sed -i "s/{{.XdsPort}}/${ENVOY_XDS_PORT}/g" /etc/envoy/envoy.yaml +RUN sed -i "s/{{.AdminPort}}/${ENVOY_ADMIN_PORT}/g" /etc/envoy/envoy.yaml + +EXPOSE 80 443 9999 + +RUN mkdir envoy + +CMD ["envoy", "-c", "/etc/envoy/envoy.yaml", "--service-cluster", "${ENVOY_NODE_CLUSTER}", "--service-node", "${ENVOY_NODE_ID}"] diff --git a/tools/envoy/envoy-template.yaml b/tools/envoy/envoy-template.yaml new file mode 100644 index 000000000..70ce9ad99 --- /dev/null +++ b/tools/envoy/envoy-template.yaml @@ -0,0 +1,120 @@ +--- +node: + metadata: + ads: true + service_name: docker + proxy_settings: + outgoing: + dependencies: + - service: "*" + locality: + zone: default-zone + +static_resources: + listeners: + - name: default_listener + address: + socket_address: + address: 0.0.0.0 + port_value: {{.EgressListenerPort}} + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + stat_prefix: ingress_proxy + rds: + route_config_name: default_routes + config_source: + ads: {} + http_filters: + - name: envoy.router + http_protocol_options: + allow_absolute_url: true + access_log: + name: envoy.file_access_log + config: + path: "./envoy/ingress-access.log" + format: > + [%START_TIME%] %PROTOCOL% %REQ(:METHOD)% %REQ(:authority)% %REQ(:PATH)% %RESPONSE_CODE% %RESPONSE_FLAGS% + %BYTES_RECEIVED%b %BYTES_SENT%b %DURATION%ms "%DOWNSTREAM_REMOTE_ADDRESS%" -> "%UPSTREAM_HOST%" + - name: ingress_listener + address: + socket_address: + address: 0.0.0.0 + port_value: {{.IngressListenerPort}} + filter_chains: + - filters: + - name: envoy.http_connection_manager + config: + stat_prefix: ingress_http + rds: + route_config_name: ingress_secured_routes + config_source: + ads: {} + http_filters: + - name: envoy.router + + clusters: + - name: envoy-control-xds + type: STRICT_DNS + hosts: + - socket_address: + address: {{.XdsHost}} + port_value: {{.XdsPort}} + lb_policy: ROUND_ROBIN + connect_timeout: + seconds: 1 + http2_protocol_options: + max_concurrent_streams: 10 + upstream_connection_options: + tcp_keepalive: + keepalive_probes: 3 + keepalive_time: 30 + keepalive_interval: 15 + - name: envoy-original-destination + type: ORIGINAL_DST + lb_policy: ORIGINAL_DST_LB + original_dst_lb_config: + use_http_header: true + connect_timeout: + seconds: 1 + http_protocol_options: + allow_absolute_url: true # required for supporting http proxy protocol + upstream_connection_options: + tcp_keepalive: + keepalive_probes: 3 + keepalive_time: 30 + keepalive_interval: 15 + - name: local_service + type: STRICT_DNS + hosts: + - socket_address: + address: http-echo + port_value: 80 + connect_timeout: 1s + - name: this_admin + type: STATIC + hosts: + - socket_address: + address: 127.0.0.1 + port_value: {{.AdminPort}} + connect_timeout: + seconds: 1 + +dynamic_resources: + lds_config: + ads: {} + cds_config: + ads: {} + ads_config: + api_type: GRPC + grpc_services: + envoy_grpc: + cluster_name: envoy-control-xds + +admin: + access_log_path: "/dev/null" + address: + socket_address: + address: 0.0.0.0 + port_value: {{.AdminPort}} diff --git a/tools/run-with-local-ec.sh b/tools/run-with-local-ec.sh new file mode 100755 index 000000000..14bbf4678 --- /dev/null +++ b/tools/run-with-local-ec.sh @@ -0,0 +1,3 @@ +#!/usr/bin/env bash + +docker-compose up consul envoy http-echo \ No newline at end of file diff --git a/tools/service/Dockerfile b/tools/service/Dockerfile new file mode 100644 index 000000000..2930ea82e --- /dev/null +++ b/tools/service/Dockerfile @@ -0,0 +1,8 @@ +FROM mendhak/http-https-echo +RUN apk add --update \ + curl \ + && rm -rf /var/cache/apk/* +WORKDIR / +COPY register_and_run.sh /register_and_run.sh +RUN chmod a+x /register_and_run.sh +ENTRYPOINT ["/register_and_run.sh"] diff --git a/tools/service/register_and_run.sh b/tools/service/register_and_run.sh new file mode 100644 index 000000000..0602041d9 --- /dev/null +++ b/tools/service/register_and_run.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env sh + +set -o pipefail +set -o errexit + +port=80 +service_name=http-echo +instance_id="${service_name}-1" + +echo "Registering instance of ${service_name} in consul" +echo "=============================" +echo +echo + +ip="$(hostname -i)" + +body=' +{ + "ID": "'${instance_id}'", + "Name": "'${service_name}'", + "Tags": [ + "primary" + ], + "Address": "'${ip}'", + "Port": '${port}', + "Check": { + "DeregisterCriticalServiceAfter": "90m", + "http": "http://'${ip}:${port}'", + "Interval": "10s" + } +} +' +curl -X PUT --fail --data "${body}" -s consul:8500/v1/agent/service/register + +cd /app +node ./index.js +