From be2d97f2a2bf1d6b99696c670d9935bcadca350e Mon Sep 17 00:00:00 2001 From: Daniel Noland Date: Fri, 6 Dec 2024 11:20:59 -0700 Subject: [PATCH] Design work Expand and re-organize docs to add in design work by the whole team. --- design-docs/src/mdbook/src/SUMMARY.md | 36 ++ .../src/mdbook/src/dataplane/design.md | 397 ++++++++++++++++++ .../mdbook/src/dataplane/development-plan.md | 149 +++++++ .../src/mdbook/src/dataplane/map-of-dpdk.md | 18 +- .../src/mdbook/src/dataplane/tasks/NAT44.md | 3 + .../dataplane/tasks/NAT64-investigation.md | 8 + .../src/mdbook/src/dataplane/tasks/NAT64.md | 10 + .../src/mdbook/src/dataplane/tasks/NAT66.md | 6 + .../src/dataplane/tasks/config-db-schema.md | 105 +++++ ...configuration-persistence-investigation.md | 61 +++ .../dataplane/tasks/control-plane-dev-env.md | 14 + .../src/dataplane/tasks/core-pinning.md | 4 + .../tasks/create-control-plane-image.md | 26 ++ .../tasks/dataplane-control-plane-protocol.md | 15 + .../dataplane-control-plane-reconcile.md | 24 ++ .../dataplane-control-plane-transport.md | 10 + .../tasks/dataplane-worker-lifecycle.md | 25 ++ .../tasks/fault-tolerance-implementation.md | 4 + .../tasks/fault-tolerance-validation.md | 10 + .../src/dataplane/tasks/gateway-test-env.md | 14 + .../dataplane/tasks/identify-local-traffic.md | 33 ++ ...agement-plane-control-plane-interaction.md | 7 + .../management-plane-dataplane-interaction.md | 9 + .../one-control-plane-daemon-per-container.md | 4 + .../tasks/performance-measurement.md | 5 + .../src/dataplane/tasks/pick-a-datastore.md | 32 ++ .../tasks/programmatic-control-of-frr.md | 8 + .../dataplane/tasks/public-internet-access.md | 4 + .../tasks/rate-limiting-investigation.md | 1 + .../src/dataplane/tasks/route-manager.md | 8 + .../src/dataplane/tasks/state-sync-design.md | 7 + .../tasks/state-sync-implementation.md | 1 + .../mdbook/src/dataplane/tasks/state-sync.md | 3 + .../src/dataplane/tasks/telemetry-basic.md | 17 + .../dataplane/tasks/telemetry-integration.md | 12 + .../tasks/telemetry-investigation.md | 16 + .../src/dataplane/tasks/underlay-routing.md | 12 + .../src/dataplane/tasks/vpc-rate-limiting.md | 8 + .../mdbook/src/dataplane/tasks/vpc-routing.md | 6 + .../src/dataplane/tasks/vxlan-tunnels.md | 8 + .../src/dataplane/tasks/zebra-plugin.md | 24 ++ design-docs/src/mdbook/src/links.md | 71 ++++ 42 files changed, 1226 insertions(+), 9 deletions(-) create mode 100644 design-docs/src/mdbook/src/dataplane/design.md create mode 100644 design-docs/src/mdbook/src/dataplane/development-plan.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/NAT44.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/NAT64-investigation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/NAT64.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/NAT66.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/config-db-schema.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/configuration-persistence-investigation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/core-pinning.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/create-control-plane-image.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-protocol.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-reconcile.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-transport.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/dataplane-worker-lifecycle.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-implementation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-validation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/gateway-test-env.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/identify-local-traffic.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/management-plane-control-plane-interaction.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/management-plane-dataplane-interaction.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/one-control-plane-daemon-per-container.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/performance-measurement.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/programmatic-control-of-frr.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/public-internet-access.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/rate-limiting-investigation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/route-manager.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/state-sync-design.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/state-sync-implementation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/state-sync.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/telemetry-basic.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/telemetry-integration.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/telemetry-investigation.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/underlay-routing.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/vpc-rate-limiting.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/vpc-routing.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/vxlan-tunnels.md create mode 100644 design-docs/src/mdbook/src/dataplane/tasks/zebra-plugin.md create mode 100644 design-docs/src/mdbook/src/links.md diff --git a/design-docs/src/mdbook/src/SUMMARY.md b/design-docs/src/mdbook/src/SUMMARY.md index c3fc1b8b..dbe233c5 100644 --- a/design-docs/src/mdbook/src/SUMMARY.md +++ b/design-docs/src/mdbook/src/SUMMARY.md @@ -11,3 +11,39 @@ - [fake-nix](./build/fake-nix.md) - [Build dataplane](./build/just-cargo-build.md) - [Sterile builds](./build/sterile-build.md) +- [Design](./dataplane/design.md) +- [Development Plan](./dataplane/development-plan.md) + - [Configuration Persistence Investigation](./dataplane/tasks/configuration-persistence-investigation.md) + - [Configuration database schema](./dataplane/tasks/config-db-schema.md) + - [Control plane dev-env](./dataplane/tasks/control-plane-dev-env.md) + - [Core pinning](./dataplane/tasks/core-pinning.md) + - [Create control plane image](./dataplane/tasks/create-control-plane-image.md) + - [Dataplane / Control plane protocol](./dataplane/tasks/dataplane-control-plane-protocol.md) + - [Dataplane / Control plane transport](./dataplane/tasks/dataplane-control-plane-transport.md) + - [Dataplane worker lifecycle](./dataplane/tasks/dataplane-worker-lifecycle.md) + - [Fault tolerance (implementation)](./dataplane/tasks/fault-tolerance-implementation.md) + - [Fault tolerance (validation)](./dataplane/tasks/fault-tolerance-validation.md) + - [Gateway test env](./dataplane/tasks/gateway-test-env.md) + - [Identify local traffic](./dataplane/tasks/identify-local-traffic.md) + - [Management plane - control plane interaction](./dataplane/tasks/management-plane-control-plane-interaction.md) + - [Management plane - dataplane interaction](./dataplane/tasks/management-plane-dataplane-interaction.md) + - [NAT44](./dataplane/tasks/NAT44.md) + - [NAT64 (investigation)](./dataplane/tasks/NAT64-investigation.md) + - [NAT64](./dataplane/tasks/NAT64.md) + - [NAT66](./dataplane/tasks/NAT66.md) + - [One control plane daemon per container](./dataplane/tasks/one-control-plane-daemon-per-container.md) + - [Performance measurement](./dataplane/tasks/performance-measurement.md) + - [Programmatic Control of FRR](./dataplane/tasks/programmatic-control-of-frr.md) + - [Public internet access](./dataplane/tasks/public-internet-access.md) + - [Rate limiting investigation](./dataplane/tasks/rate-limiting-investigation.md) + - [Route manager](./dataplane/tasks/route-manager.md) + - [State sync (design)](./dataplane/tasks/state-sync-design.md) + - [State sync (implementation)](./dataplane/tasks/state-sync.md) + - [Telemetry (basic)](./dataplane/tasks/telemetry-basic.md) + - [Telemetry (integration)](./dataplane/tasks/telemetry-integration.md) + - [Telemetry (investigation)](./dataplane/tasks/telemetry-investigation.md) + - [Underlay routing](./dataplane/tasks/underlay-routing.md) + - [VPC rate-limiting](./dataplane/tasks/vpc-rate-limiting.md) + - [VPC routing](./dataplane/tasks/vpc-routing.md) + - [VXLAN tunnels](./dataplane/tasks/vxlan-tunnels.md) + - [Zebra Plugin](./dataplane/tasks/zebra-plugin.md) diff --git a/design-docs/src/mdbook/src/dataplane/design.md b/design-docs/src/mdbook/src/dataplane/design.md new file mode 100644 index 00000000..1cc5a352 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/design.md @@ -0,0 +1,397 @@ +# Required features for MVP + +At a very high level, these are the _user facing_ features that we require to reach MVP with the gateway: + +1. BGP underlay +2. EVPN overlay +3. VPC routing (aka RIOT) +4. VPC nat 44/66 +5. VPC nat 64 +6. Telemetry +7. Rate limiting +8. AB fault tolerance +9. Management API + +## User-facing features + +
+ +```plantuml +@startdot +digraph features { +labelloc=t +graph [ranksep=0.6] + +node[shape="rect"] +BGP_underlay [ label="BGP underlay", style=filled, color="lightblue"] +EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue"] +VPC_routing [ label="VPC routing", style=filled, color="lightblue"] +VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue"] +VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue"] +Telemetry [ label="Telemetry/observability", style=filled, color="lightblue"] +rate_limiting [ label="Rate limiting", style=filled, color="lightblue"] +Fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue"] +Management_API [label="Management API", style=filled, color="lightblue"] +all [label="*"] +all -> Management_API +Management_API -> all + +BGP_underlay -> EVPN_overlay; +EVPN_overlay -> VPC_routing; +VPC_routing -> VPC_nat44_66; +VPC_routing -> VPC_nat64; +VPC_routing -> rate_limiting; +EVPN_overlay -> Fault_tolerance; +Fault_tolerance -> VPC_nat64; +Fault_tolerance -> VPC_nat44_66; +VPC_routing -> Telemetry; +VPC_nat44_66 -> Telemetry [xlabel="weak"]; +VPC_nat64 -> Telemetry [xlabel="weak"]; +rate_limiting -> Telemetry [xlabel="weak"]; +} +@enddot +``` + +> A graph of the functional dependencies between the required _user facing_ features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all of the other features which point to it. + +
+ +
+ +```plantuml +@startdot +digraph features { + labelloc=t + node [shape="box"] + graph [ranksep=0.8] + label=< Feature map
(major features)
> + + BGP_underlay [ label="BGP underlay", style=filled, color="lightblue" ] + EVPN_overlay [ label="EVPN overlay", style=filled, color="lightblue" ] + VPC_routing [ label="VPC routing\n(aka RIOT)", style=filled, color="lightblue" ] + VPC_nat44_66 [ label="VPC nat44/66", style=filled, color="lightblue" ] + VPC_nat64 [ label="VPC nat64", style=filled, color="lightblue" ] + telemetry [ label="Telemetry/observability", style=filled, color="lightblue" ] + rate_limiting [ label="Rate limiting", style=filled, color="lightblue" ] + fault_tolerance [ label="Fault tolerance", style=filled, color="lightblue" ] + Management_API [ label="Management API", style=filled, color="lightblue" ] + + control_plane_integration [ label="control plane integration"] + state_sync [ label="state sync" ] + hardware_offloaded_nat [ label="offload nat" ] + hardware_offloaded_routing [ label="Underlay route offload" ] + hardware_offloaded_vpc [ label="VPC route offload" ] + hardware_offloading_basic [ label="basic offloading" ] + datastore_integration [ label="datastore integration" ] + + all [label="*"] + Management_API -> all + all -> Management_API + + datastore_integration -> control_plane_integration + datastore_integration -> hardware_offloaded_routing + hardware_offloading_basic -> hardware_offloaded_routing + hardware_offloaded_routing -> BGP_underlay + fault_tolerance -> VPC_nat44_66 + fault_tolerance -> VPC_nat64 + BGP_underlay -> EVPN_overlay + EVPN_overlay -> VPC_routing + EVPN_overlay -> state_sync + EVPN_overlay -> hardware_offloaded_vpc + hardware_offloaded_nat -> VPC_nat44_66 + hardware_offloaded_nat -> VPC_nat64 + VPC_nat44_66 -> telemetry [xlabel="weak"] + VPC_nat64 -> telemetry [xlabel="weak"] + VPC_routing -> telemetry + VPC_routing -> VPC_nat44_66 + VPC_routing -> VPC_nat64 + VPC_routing -> rate_limiting + control_plane_integration -> BGP_underlay + state_sync -> fault_tolerance + hardware_offloaded_vpc -> hardware_offloaded_nat + hardware_offloaded_vpc -> rate_limiting + hardware_offloading_basic -> hardware_offloaded_vpc + rate_limiting -> telemetry [xlabel="weak"] +} +@enddot +``` + +> Here is a _very_ high-level graph of the functional dependencies between the required features. +> Each node on the graph represents a feature. +> No feature can be _completed_ without all the other features which point to it. +> Features shown in blue are user facing. +> All other features represent internal implementation concerns. + +
+ +## Component Map + +
+ +```puml +@startuml +skinparam hyperlinkUnderline false +skinparam linetype ortho +!unquoted function $link($name, $url) +!return "[[" + $url + " " + $name + "]]" +!endfunction + + + +!$q = { "uote": "\"" } + +!$doc_links = { + "config_store": { "text": "Configuration Store", "url": "#configuration-store" }, + "gateway_agent": { "text": "Gateway Agent", "url": "#gateway-agent" }, + "frr_agent": { "text": "FRR agent", "url": "#frr-agent" }, + "zebra": { "text": "zebra", "url": "https://docs.frrouting.org/en/latest/zebra.html" }, + "routing_daemons": { "text": "routing daemons", "url": "#routing-daemons" }, + "zebra_plugin": { "text": "Zebra\\nplugin", "url": "#zebra-plugin" }, + "kernel": { "text": "kernel", "url": "https://en.wikipedia.org/wiki/Linux_kernel" }, + "interface_manager": { "text": "interface manager", "url": "#interface-manager" }, + "routing_manager": { "text": "routing manager", "url": "#routing-manager" }, + "dataplane_workers": { "text": "dataplane workers", "url": "#dataplane-workers" }, + "nat_manager": { "text": "nat manager", "url": "#nat-manager" }, + "control_plane_interface": { "text": "control plane interface", "url": "#control-plane-interface" }, + "management_plane_interface": { "text": "management plane interface", "url": "#management-plane-interface" }, + "state_sync": { "text": "state sync", "url": "#state-sync" }, + "dataplane_model": { "text": "dataplane model", "url": "#dataplane-model" }, + "management_plane": { "text": "management plane", "url": "#management-plane" }, + "control_plane": { "text": "control plane", "url": "#control-plane" }, + "dataplane": { "text": "dataplane", "url": "#dataplane" } +} + +!unquoted function $linked($key) + !return $link($doc_links[$key].text, $doc_links[$key].url) +!endfunction + +!unquoted function $r($key) + !return "rectangle " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +!unquoted function $db($key) + !return "database " + $key + " as " + $q.uote + $linked($key) + $q.uote +!endfunction + +$r(management_plane) { + $r(gateway_agent) + $db(config_store) +} + +$r(kernel) + +$r(control_plane) { + $r(routing_daemons) + $r(zebra) { + $r(zebra_plugin) + } + $r(frr_agent) +} + +$r(dataplane) { + $r(control_plane_interface) + $r(management_plane_interface) + $db(dataplane_model) + $r(routing_manager) + $r(nat_manager) + $r(state_sync) + $r(interface_manager) + $r(dataplane_workers) +} + +rectangle sister_dataplane as "sister dataplane" { + rectangle rest as "..." + rectangle sister_state_sync as "state sync" +} + +rectangle nics + +control_plane_interface -- dataplane_model +dataplane_workers <--> nics : [[ https://www.dpdk.org/ dpdk ]] +frr_agent <--> routing_daemons +frr_agent <--> zebra +gateway_agent -- frr_agent +gateway_agent -- management_plane_interface +config_store -- gateway_agent +interface_manager -- dataplane_model +interface_manager <--> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] +dataplane_model - state_sync +dataplane_model <--> nat_manager +dataplane_model <--> routing_manager +management_plane_interface -- dataplane_model +nat_manager <--> dataplane_workers +zebra_plugin --- control_plane_interface : [[ https://en.wikipedia.org/wiki/Unix_domain_socket unix socket ]] +routing_daemons <-> zebra +routing_manager <--> dataplane_workers +state_sync <-> sister_state_sync : [[ https://en.wikipedia.org/wiki/Remote_direct_memory_access rdma]] +zebra <-> kernel : [[ https://man7.org/linux/man-pages/man7/netlink.7.html netlink socket ]] + +@enduml +``` + +> Map of the relationships between planned dataplane components + +
+ +
+ +### Configuration Store + +I could (and maybe should) write a book about the design considerations of [Configuration Store]. +For the moment I will limit myself to a list of hard and fast requirements: + +1. CP in the [CAP theorem](https://en.wikipedia.org/wiki/CAP_theorem) sense. + - immediate consistency in the sense that + + > Every read receives either the most recent data or an error. + + - partition tolerance + + > The system continues to operate despite an arbitrary number of messages being dropped (or delayed) by the network between nodes. + + The guiding theory is that + + 1. It is better to **not** function than to **mal**function. + 2. _**It doesn't matter how quickly you can do the wrong thing**_. + +
+
+ +### Gateway Agent + +This is another subject deserving of a small book. + +For now, I will point out some notable design decisions we need to make: + +1. Do we expect a subscription model? +2. If not, do we expect the [gateway agent] to explicitly push state to dependent components? + +Beyond that, we need to make some high-level design choices: + +1. programming language? Likely Go or Rust. +2. REST? GraphQL? I tend to think REST is more appropriate at this time. + +
+
+ +### FRR agent + +Be afraid. Make Fredi fill in this section. But also be afraid. + +
+
+ +### Zebra Plugin + +This is a planned [zebra] plugin in the same spirit as [`fpm`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#id1) or [`dataplane_fpm_nl`](https://docs.frrouting.org/projects/dev-guide/en/latest/fpm.html#dplane-fpm-nl). + +The core idea is to have a plugin that can be dynamically loaded into [zebra] and will listen to the [zebra event stream](https://github.com/FRRouting/frr/blob/ee5a3456d34a756c70ad8856ab7be7bed75ee31c/zebra/zebra_dplane.h#L114-L217) for updates. +The plugin will then take those updates and push them into the dataplane agent, allowing the dataplane to react to route updates. + +
+
+ +### Routing daemons + +For the moment these are [`bgpd`](https://docs.frrouting.org/en/latest/bgp.html) and [`bfdd`](https://docs.frrouting.org/en/latest/bfd.html). + +
+
+ +### Interface Manager + +This is a component that exchanges [netlink] messages with the [kernel] in response to changes in the [dataplane model]. +Its responsibilities include + +1. construction of virtual network interfaces needed by [zebra] +2. translation of ephemeral linux kernel parameters into ephemeral [dpdk] parameters (e.g. netlink interface index to dpdk interface id). +3. retrieval of information not available to [zebra]/[frr] such as neighbor tables / [ARP] / [IPv6 ND] resolution or [bridge] fdb. + +### Control Plane Interface + +This component is responsible for adjudicating communication between the [control plane] and the [dataplane]. +This component is expected to: + +1. Deserialize [bincode] (or perhaps [bitcode]) messages from the [hedgehog plugin] articulating the control plane's rules for the dataplane. +2. Express error messages back to the [control plane] articulating any error conditions. + For example, if the [dataplane] is unable to offload a route for whatever reason (e.g. rout type not supported) so that said routes are not advertised by the [control plane]. +3. Express the offloading status (including counters) back to the [control plane] (if possible). + +### Management plane interface + +The [management plane interface] is the interface between the [management plane] and the [dataplane]. + +1. Receive [bincode] (or perhaps [bitcode]) messages from the [gateway agent] over a [unix domain socket] (or perhaps a TCP socket?), parse them, and then update the [dataplane model] to reflect the desired configuration. + +
+
+ +### Dataplane model + +This is an internal component of the [dataplane] which is responsible for managing the _desired_ state of the dataplane. It is updated by the [management plane interface] and is responsible for expressing the _desired state_ (not the observed state) of the [dataplane] to downstream components such as the [routing manager] or the [nat manager]. + +
+
+ +### State sync + +This component is responsible for synchronizing the state of sister dataplanes in the name of fault tolerance. + +
+
+ +### Routing manager + +This component is responsible for managing the routing tables for the dataplane. It is responsible for translating the _desired routing rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + +
+
+ +### NAT manager + +This component is responsible for managing the [network address translation] tables for the dataplane. It is responsible for translating the _desired NAT rules_ expressed by the [management plane interface] into a set of rules that can be executed by the [dataplane workers][dataplane worker]. + + +
+
+ +### Dataplane workers + +This is a collection of [rte lcores] which are responsible for actually performing the packet processing. +The workers are responsible for performing the following tasks: + +- Receive packets from the NIC +- Identify local traffic +- Perform underlay routing +- Perform overlay routing +- Perform [NAT] +- Transmit packets to the NIC + +
+
+ +### Dataplane + +The main packet processing engine. + +### Management Plane + +The management plane is a high-level abstraction that is responsible for + +1. Accepting API calls from the end user. +2. Translating those API calls into dataplane and control plane configuration. +3. Storing that configuration in the [Configuration Store] + +
+
+ +### Control Plane + +The control plane is, for the moment, just [bgpd] and [bfdd]. + +
+ +{{#include ../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/development-plan.md b/design-docs/src/mdbook/src/dataplane/development-plan.md new file mode 100644 index 00000000..5367fec7 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/development-plan.md @@ -0,0 +1,149 @@ +## Development plan + +
+ +```plantuml +@startdot +!$ptr=./tasks +!$optional="color=lightyellow, style=filled" +!$started="color=lightblue, style=filled" +!$completed="color=lightgreen, style=filled" +!$urgent="color=orange, style=filled" +!$difficult="color=pink, style=filled" +digraph g { + node [shape="box"] + splines=ortho; + + graph [ranksep=0.9] + labelloc=t + overlap=false; + concentrate="true"; + remincross=true; + mclimit=800; + compound=true; + + underlay_routing [label="underlay routing", href="$ptr/underlay-routing.html", fontcolor=blue] + config_db_schema [label="config db schema", $difficult, href="$ptr/config-db-schema.html", fontcolor=blue] + core_pinning [label="core pinning", $optional, href="$ptr/core-pinning.html", fontcolor=blue] + cp_api_control_investigation [label=<programmatic control of frr
(investigation)
>, $urgent, href="$ptr/programmatic-control-of-frr.html", fontcolor=blue] + cp_dev_env [label="control plane\ndev env", href="$ptr/control-plane-dev-env.html", fontcolor=blue] + cp_image_creation [ label="Create control plane container image", href="$ptr/create-control-plane-image.html", fontcolor=blue] + dp_dev_env [label="dataplane dev env", $completed, href="../../build/index.html", fontcolor=blue] + dp_dp_state_sync [label="state sync\n(implementation)", $difficult, href="$ptr/state-sync.html", fontcolor=blue] + dp_dp_state_sync_design [label="state sync\n(design)", $urgent, href="$ptr/state-sync-design.html", fontcolor=blue] + dp_image_creation [label="dataplane image build", $completed] + fault_tolerance [label="fault tolerance (implementation)", href="$ptr/fault-tolerance-implementation.html", fontcolor=blue] + fault_tolerance_proof [label="fault tolerance (validation)", $difficult, href="$ptr/fault-tolerance-validation.html", fontcolor=blue] + zebra_plugin_basic [ label="zebra plugin\n(basic)", href="$ptr/zebra-plugin.html", fontcolor=blue ] + frr_programmatic_control [label=<programmatic
control of frr
>, $difficult, href="$ptr/programmatic-control-of-frr.html", fontcolor=blue] + gw_test_env [label="gateway test env", href="$ptr/gateway-test-env.html", fontcolor=blue] + investigate_config_persist [ label=<configuration
persistence
(investigation)
>, $urgent, href="$ptr/configuration-persistence-investigation.html", fontcolor=blue ] + local_traffic_ident [ label="identify local traffic", href="$ptr/identify-local-traffic.html", fontcolor=blue] + mp_cp_interaction [ label="management plane \ncontrol plane interaction", href="$ptr/management-plane-control-plane-interaction.html", fontcolor=blue] + mp_dp_interaction [ label="management plane \ndataplane interaction", href="$ptr/management-plane-dataplane-interaction.html", fontcolor=blue] + nat64_investigation [label=<NAT64 investigation>, $urgent, href="$ptr/NAT64-investigation.html", fontcolor=blue] + performance_measurement [ label="measure performance", href="$ptr/performance-measurement.html", fontcolor=blue] + plugin_dp_proto [ label="plugin/dataplane protocol", $started, href="$ptr/dataplane-control-plane-protocol.html", fontcolor=blue] + plugin_dp_transport [ label="plugin/dataplane transport", $completed, href="$ptr/dataplane-control-plane-transport.html", fontcolor=blue] + public_internet_access [label="public internet access", href="$ptr/public-internet-access.html", fontcolor=blue] + rate_limiting_investigation [label="rate limiting investigation", $completed] + routing_manager [label="routing manager", href="$ptr/route-manager.html", fontcolor=blue] + separate_cp_containers [ label="one cp daemon per container", $optional, href="$ptr/one-control-plane-daemon-per-container.html", fontcolor=blue] + telemetry_basic [label="telemetry (basic)", href="$ptr/telemetry-basic.html", fontcolor=blue] + telemetry_investigation [label="telemetry\n(investigation)", $completed, href="$ptr/telemetry-investigation.html", fontcolor=blue] + telemetry_integrated [label="telemetry (integration)", href="$ptr/telemetry-integration.html", fontcolor=blue] + vpc_nat44 [label="nat44", href="$ptr/NAT44.html", fontcolor=blue] + vpc_nat64 [label="nat64", $difficult, href="$ptr/NAT64.html", fontcolor=blue] + vpc_nat66 [label="nat66", href="$ptr/NAT66.html", fontcolor=blue] + vpc_rate_limiting [label="vpc rate limiting", href="$ptr/vpc-rate-limiting.html", fontcolor=blue] + vpc_routing [label="vpc routing", href="$ptr/vpc-routing.html", fontcolor=blue] + vxlan_tunnels [label="vxlan tunnels", href="$ptr/vxlan-tunnels.html", fontcolor=blue] + vxlan_tunnel_investigation [label="vxlan tunnels\n(investigation)", $completed] + worker_lifecycle [label="dp worker lifecycle", href="$ptr/dataplane-worker-lifecycle.html", fontcolor=blue] + + nat64_investigation -> dp_dp_state_sync_design + investigate_config_persist -> config_db_schema + dp_dp_state_sync_design -> dp_dp_state_sync + cp_api_control_investigation -> frr_programmatic_control + frr_programmatic_control -> mp_cp_interaction + vxlan_tunnel_investigation -> vxlan_tunnels + vxlan_tunnels -> vpc_routing + + nat64_investigation -> vpc_nat64 + vpc_nat64 -> public_internet_access + vpc_nat44 -> public_internet_access + vpc_nat66 -> public_internet_access + dp_dp_state_sync -> fault_tolerance + + rate_limiting_investigation -> vpc_rate_limiting + telemetry_investigation -> telemetry_basic + telemetry_basic -> telemetry_integrated + + mp_dp_interaction -> telemetry_integrated + + telemetry_integrated -> performance_measurement + core_pinning -> performance_measurement + dp_dp_state_sync -> performance_measurement + + vpc_routing -> vpc_rate_limiting + mp_cp_interaction -> vpc_routing + underlay_routing -> vpc_routing + cp_dev_env -> gw_test_env + cp_image_creation -> cp_dev_env + cp_image_creation -> separate_cp_containers + dp_dev_env -> gw_test_env + dp_image_creation -> dp_dev_env + gw_test_env -> zebra_plugin_basic + zebra_plugin_basic -> routing_manager + config_db_schema -> mp_cp_interaction + config_db_schema -> mp_dp_interaction + local_traffic_ident -> zebra_plugin_basic + mp_dp_interaction -> vpc_routing + plugin_dp_proto -> zebra_plugin_basic + plugin_dp_transport -> zebra_plugin_basic + routing_manager -> underlay_routing + config_db_schema -> underlay_routing + vpc_routing -> vpc_nat44 + vpc_routing -> vpc_nat64 + vpc_routing -> vpc_nat66 + worker_lifecycle -> core_pinning + worker_lifecycle -> vpc_routing + + vpc_nat44 -> dp_dp_state_sync + vpc_nat66 -> dp_dp_state_sync + vpc_nat64 -> dp_dp_state_sync + fault_tolerance -> fault_tolerance_proof + + subgraph cluster_legend { + label="legend"; + started [label="started", $started] + optional [label="optional", $optional] + completed [label="\"completed\"", $completed] + urgent [label="urgent", $urgent] + difficult [label="difficult", $difficult] + } + +} +@enddot +``` + +
+ +> Graph of the engineering development plan. +> Each node on the graph represents a task or required function. +> No task can be _completed_ without all the other tasks which point to it. +> +> * Tasks shown in orange are points of higher uncertainty and risk. +> * Tasks shown in pink are points of expected higher difficulty. +> * Tasks shown in gray are already completed. +
+
+ +> [!NOTE] +> I am recommending that tasks with higher uncertainty (shown in orange) be addressed with all possible speed. +> Especially if they directly connect to tasks of high expected difficulty. + +> [!WARNING] +> Tasks of high expected difficulty are different from tasks which we expect will be very time-consuming. + +{{#include ../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/map-of-dpdk.md b/design-docs/src/mdbook/src/dataplane/map-of-dpdk.md index 598cb8fb..48182b36 100644 --- a/design-docs/src/mdbook/src/dataplane/map-of-dpdk.md +++ b/design-docs/src/mdbook/src/dataplane/map-of-dpdk.md @@ -26,15 +26,15 @@ hide circle !endfunction !$doc_links = { - "flow_item": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#pattern-item", - "flow_item_template": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#pattern-templates", - "flow_action": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#actions", - "flow_action_template": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#actions-templates", - "flow_action_indirect": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#action-indirect", - "flow_action_indirect_list": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#action-indirect-list", - "switch_domain": "https://doc.dpdk.org/guides-24.07/prog_guide/switch_representation.html", - "flow_table": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html#attribute-group", - "flow": "https://doc.dpdk.org/guides-24.07/prog_guide/rte_flow.html", + "flow_item": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#pattern-item", + "flow_item_template": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#pattern-templates", + "flow_action": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#actions", + "flow_action_template": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#actions-templates", + "flow_action_indirect": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#action-indirect", + "flow_action_indirect_list": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#action-indirect-list", + "switch_domain": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/switch_representation.html", + "flow_table": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html#attribute-group", + "flow": "https://doc.dpdk.org/guides-24.11/prog_guide/ethdev/flow_offload.html", "hairpin_queue": "https://inbox.dpdk.org/dev/1565703468-55617-1-git-send-email-orika@mellanox.com/", "membuf": "https://doc.dpdk.org/guides/prog_guide/mbuf_lib.html", "mempool": "https://doc.dpdk.org/guides/prog_guide/mempool_lib.html", diff --git a/design-docs/src/mdbook/src/dataplane/tasks/NAT44.md b/design-docs/src/mdbook/src/dataplane/tasks/NAT44.md new file mode 100644 index 00000000..85c8dbec --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/NAT44.md @@ -0,0 +1,3 @@ +# NAT44 + +Basic IPv4 NAT. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/NAT64-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks/NAT64-investigation.md new file mode 100644 index 00000000..88defad6 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/NAT64-investigation.md @@ -0,0 +1,8 @@ +# NAT64 (investigation) + +Linux provides no implementation of [NAT64] so we don't have much in the way of reference implementation to fall back on without going full layer 7. + +Getting the hardware offloads to work on this may be really challenging. +My understanding is that the ConnectX-7 cards are the only ones that support [NAT64] offload, and even then under limited conditions. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/NAT64.md b/design-docs/src/mdbook/src/dataplane/tasks/NAT64.md new file mode 100644 index 00000000..45df287f --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/NAT64.md @@ -0,0 +1,10 @@ +# NAT64 + +This one is going to be tricky. +I hesitate to posit any specific design for this at this time. +See the [investigation](./NAT64-investigation.md) for more deatils. + +> [!WARNING] +> Here be dragons! + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/NAT66.md b/design-docs/src/mdbook/src/dataplane/tasks/NAT66.md new file mode 100644 index 00000000..fdb200f6 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/NAT66.md @@ -0,0 +1,6 @@ +# NAT66 + +Basic(?) IPv6 NAT. + +> [!NOTE] +> Are we really-really sure we need this? diff --git a/design-docs/src/mdbook/src/dataplane/tasks/config-db-schema.md b/design-docs/src/mdbook/src/dataplane/tasks/config-db-schema.md new file mode 100644 index 00000000..bd046953 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/config-db-schema.md @@ -0,0 +1,105 @@ +# Configuration database schema + +One of our biggest TODO items is to create an ER diagram for our configuration database. + +To be clear, I am not asserting that we need to use an RDBMS in practice. +We just need an exacting spec for the relationships between our configuration data. + +
+ +```plantuml +@startuml +skinparam linetype ortho +skinparam hyperlinkUnderline false + +hide empty description +hide empty members +hide circle + +entity Group { + **id: PK**, + name: String, +} + +entity User { + **id: PK**, + name: String, +} + +entity GroupMembership { + **id: PK**, + user: FK, + group: FK, +} + +entity Vpc { + **id: PK**, + name: String, + vrf: u32, + group: FK, +} + +entity Discriminant { + **id: PK**, + vni: Option, + vid: Option, + aci: Option<(Vid, Vni)>, + --- + Note: + \t Exclusive: vni, vid, aci + \t (only one non-null) + +} + +entity Interface { + **id: PK**, + meta: Unique, + vpc: FK, + name: String, +} + +entity IpAddressAssignment { + **id: PK**, + vpc: FK, + interface: FK, + cidr: (Ip, Subnet), + --- + -- prevent overlapping Ip assignments + exclude using gist ( + \t vpc with =, cidr inet_ops with && + ) +} + +entity Peering { + **id: PK**, + group: FK, +} + +entity PeeringRelation { + **id: PK**, + type: enum (provider, consumer, peer, direct) + peering: FK, + interface: FK, + --- + Note: + \t restrict to one provider + \t type per peering (needs gin index?) +} + +Group ||--o{ Peering +Group ||--o{ Vpc +Group ||--o| GroupMembership +Interface ||--o{ IpAddressAssignment +Interface ||--|| Discriminant +Peering ||--o{ PeeringRelation +PeeringRelation }o--|| Interface +User ||--o| GroupMembership +Vpc ||--o{ Interface +Vpc ||-o{ IpAddressAssignment + +@enduml +``` + +> We need to think about access controls and cardinality more. + +
diff --git a/design-docs/src/mdbook/src/dataplane/tasks/configuration-persistence-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks/configuration-persistence-investigation.md new file mode 100644 index 00000000..46e4ceb5 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/configuration-persistence-investigation.md @@ -0,0 +1,61 @@ +# Configuration persistence + +We need to officially pick a data store for configuration information. + +This data store _is not_ intended for storing "fast" state. +Rather, this store needs to hold configuration data which is + +1. durable +2. atomic +3. strongly typed +4. immediately consistent + +## etcd + +[etcd] is a reasonable choice because + +1. It is already in use in kubernetes and is therefore likely to be well-maintained and tested. +2. we are already using / integrating with kubernetes so any flaws in `etcd` are likely to impact us anyway. + +I have used [`zookeeper`](https://zookeeper.apache.org/) in the past and *strongly recommend against it*. + +I would also consider [`consul`](https://github.com/hashicorp/consul) but [the license](https://github.com/hashicorp/consul/blob/main/LICENSE) is *_not_* acceptable. + +A newer entry in the space is [`nacos`](https://github.com/alibaba/nacos) but I think it is less well suited since it only seems to support eventual consistency. + +## rqlite + +_I have not used [`rqlite`],_ but it seems to be a reasonable (if young) option. +My biggest concern is that [transactions](https://rqlite.io/docs/api/api/#transactions) support seems _very_ weak. + +- has a supported [rust client](https://github.com/tomvoet/rqlite-rs) (and even a [sqlx](https://github.com/launchbadge/sqlx) client in the form of [sqlx-rqlite](https://crates.io/crates/sqlx-rqlite)) +- [weak](https://rqlite.io/docs/api/read-consistency/#weak), [linearizable](https://rqlite.io/docs/api/read-consistency/#linearizable), and [strong](https://rqlite.io/docs/api/read-consistency/#strong) consistency models supported +- [transactions](https://rqlite.io/docs/api/api/#transactions) (this seems less than ideal tho) + +## TiKV + +[TiKV] seems like the **strongest near-term option** on the list. + +I think that the biggest advantage is in the case that we want to _eventually_ switch to [TiDB]. +That strategy allows us the most flexibility to use a "real" database in the future while using a "simple" KV database in the near term. + +## TiDB + +[TiDB] is a [MySQL] compatible [distributed SQL] database built on top of [TiKV]. + +The thing which I find most striking about this database is the excellent documentation and robust feature set (robust all things considered). + +- [Generated columns](https://docs.pingcap.com/tidb/dev/generated-columns) +- [JSON](https://docs.pingcap.com/tidb/dev/data-type-json) +- [Referential integrity](https://docs.pingcap.com/tidb/dev/foreign-key) +- [Transactions](https://docs.pingcap.com/tidb/dev/transaction-overview) +- [Views](https://docs.pingcap.com/tidb/dev/views) +- [Change data capture](https://docs.pingcap.com/tidb/stable/ticdc-overview) + +## Summary + +Thus, I think the real choice is between [`etcd`], [TiDB], and [TiKV]. + +That choice comes down to how much we value the functionality of sql (multiple indexes, referential integrity, strong schema) vs. the upsides of kv databases (watches, more easily evolved schema). + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md new file mode 100644 index 00000000..2b8c569b --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/control-plane-dev-env.md @@ -0,0 +1,14 @@ +# Control plane dev-env + +Create and document a development environment for the [zebra] [hedgehog plugin]. + +Requirements: + +- **REQUIRE**: can build plugin withing container +- **REQUIRE**: CI builds dev-env container +- **REQUIRE**: CI runs tests in dev-env container or, +- **IDEALLY**: tests run in a more minimal test-env container. + +## Likely dispatch + +- [@Fredi-raspall] diff --git a/design-docs/src/mdbook/src/dataplane/tasks/core-pinning.md b/design-docs/src/mdbook/src/dataplane/tasks/core-pinning.md new file mode 100644 index 00000000..a8eaa4c8 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/core-pinning.md @@ -0,0 +1,4 @@ +# Core pinning + +> [!NOTE] +> I think we can punt on this until the last minute! diff --git a/design-docs/src/mdbook/src/dataplane/tasks/create-control-plane-image.md b/design-docs/src/mdbook/src/dataplane/tasks/create-control-plane-image.md new file mode 100644 index 00000000..2951d775 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/create-control-plane-image.md @@ -0,0 +1,26 @@ +# Create a control-plane container image + +We need to generate a docker image to run our control plane. + +## Goals: + +1. **REQUIRE**: [`zebra`] plugin support +2. **REQUIRE**: [`bgpd`] support +3. **REQUIRE**: [`bfdd`] support +4. **REQUIRE**: CI builds and container +5. **REQUIRE**: [Lua scripting] should be disabled in build +6. **IDEALLY**: disable as much functionality as we can get away with +7. **IDEALLY**: supply a debug build and release build + +## Note: + +Both [@Fredi-raspall] and [@daniel-noland] have made some progress on this task and should sync up to get it over the line. + +## Likely dispatch + +- [@Fredi-raspall] + +[Lua scripting]: https://docs.frrouting.org/en/latest/scripting.html + +{{#include ../../links.md}} + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-protocol.md b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-protocol.md new file mode 100644 index 00000000..1a3420d2 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-protocol.md @@ -0,0 +1,15 @@ +# Dataplane / Control Plane communication protocol + +We need some method of sending and receiving data between the [dataplane] and [control plane]. + +This may take the form of [serde] driven message serialization and deserialization. +Use of [serde] almost certainly requires the use of [bindgen] or [cbindgen]. + +Alternatives include schema-first method such as [protobuf] or [capnproto], or a bespoke binary protocol. + +## Likely assignment + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-reconcile.md b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-reconcile.md new file mode 100644 index 00000000..eaa36057 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-reconcile.md @@ -0,0 +1,24 @@ +# Dataplane / Control Plane reconcile + +The dataplane and control plane need to communicate with each other regarding + +1. Full routing tables (for [state sync]) +2. route updates (i.e. differential updates) +3. route offloading status (including failures) +4. Address assignments, to ensure the dataplane can configure [local delivery](./identify-local-traffic.md) + +Keep in mind that route tables are, in general, notably more complex than a naive LPM trie, and may include like: + +1. [ECMP]/WCMP +2. [encapsulation rules](https://www.man7.org/linux/man-pages/man8/ip-route.8.html), +3. [nexthop groups](https://man7.org/linux/man-pages/man8/ip-nexthop.8.html), +4. multicast routes (this is unlikely to be important in the near term). + +We only expect to support basic IPv4 and IPv6 LPM routes in the near term, but feature evolution should be accounted for in the design. + +## Likely dispatch + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-transport.md b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-transport.md new file mode 100644 index 00000000..bcf14741 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-control-plane-transport.md @@ -0,0 +1,10 @@ +# Dataplane / Control Plane communication transport + +It seems like we have all agreed on [unix domain sockets]. + +## Likely dispatch + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/dataplane-worker-lifecycle.md b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-worker-lifecycle.md new file mode 100644 index 00000000..75474d35 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/dataplane-worker-lifecycle.md @@ -0,0 +1,25 @@ +# Dataplane worker lifecycle + +This is mostly a design task at this point. + +Things which need to be worked out and documented: + +1. communication pattern between workers +2. communication pattern between workers and the control plane +3. communication pattern between workers and the management plane +4. communication pattern between workers and the telemetry / monitoring subsystems + +In each case, we need to consider + +1. performance impact, +2. thread safety, +3. design simplicity, +4. transactionality, +5. extensibility. + +## Likely dispatch + +- primary: [@daniel-noland] +- sync with: [@sergeymatov] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-implementation.md b/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-implementation.md new file mode 100644 index 00000000..f99fcb3a --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-implementation.md @@ -0,0 +1,4 @@ +# Fault tolerance (implementation) + +This is principally challenging from a testing perspective. +Beyond that, it mostly comes down to endless retry. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-validation.md b/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-validation.md new file mode 100644 index 00000000..a560210c --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/fault-tolerance-validation.md @@ -0,0 +1,10 @@ +# Fault tolerance (validation) + +> [!WARNING] +> Fault tolerance is an extremely challenging thing to test and to prove! + +Some tools can help us here: + +1. [bolero](https://github.com/camshaft/bolero) +2. [smoltcp](https://github.com/smoltcp-rs/smoltcp) +3. [Stateright](https://github.com/stateright/stateright) diff --git a/design-docs/src/mdbook/src/dataplane/tasks/gateway-test-env.md b/design-docs/src/mdbook/src/dataplane/tasks/gateway-test-env.md new file mode 100644 index 00000000..52d0782c --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/gateway-test-env.md @@ -0,0 +1,14 @@ +# Gateway test-env + +We need an environment in which we can run integration tests between the [control plane], [dataplane], and [management plane]. + +This task will require coordination between the [dataplane]'s extant test environment and the [control plane]'s test environment. +Note that there is _**no requirement**_ that the integration tests exist in a single. +In fact, it is likely best that they do not. + +## Likely assignment + +* Primary: [@Fredi-raspall] +* Coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/identify-local-traffic.md b/design-docs/src/mdbook/src/dataplane/tasks/identify-local-traffic.md new file mode 100644 index 00000000..b35f8dee --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/identify-local-traffic.md @@ -0,0 +1,33 @@ +# Identify local traffic + +At its most basic level, the Hedgehog dataplane is a router. +While most traffic processed by the router will be directed _through_ the router, some traffic will be directed _to_ the router itself. + +The primary classes of this traffic are: + +1. [Control plane] traffic + - e.g. BGP session traffic + - (future) [IPsec] [IKE] traffic +2. [Management plane] traffic + - traffic directed to the data plane from a management plane running on another machine. + - traffic directed to the management plane from the end user (e.g., API calls). +3. Low-level network management protocol traffic + - [ARP] requests and responses + - [IPv6 ND] requests and responses + - (possibly) [LACP] pdu frames (depending on client configuration) + - [BFD] pdu frames +4. [state sync] traffic + - traffic to maintain state synchronization between dataplane nodes + +These types of traffic will need to be accounted for in the offload rules of the data plane to avoid: + +1. forwarding such traffic +2. dropping such traffic + +## Likely dispatch + +- develop: [@daniel-noland] +- coordinate with [@Fredi-raspall] to ensure that needed control plane traffic makes it through. +- coordinate with [@sergeymatov] to ensure that needed dataplane control traffic makes it through. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/management-plane-control-plane-interaction.md b/design-docs/src/mdbook/src/dataplane/tasks/management-plane-control-plane-interaction.md new file mode 100644 index 00000000..237f7948 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/management-plane-control-plane-interaction.md @@ -0,0 +1,7 @@ +# Management plane - control plane interaction + +At first I thought this was mostly dependent on the [config db schema](./config-db-schema.md) but now I think that maybe this should all be routed through the dataplane. + +> [!CAUTION] +> This is a potential source of misalignment in the project overall. +> We need to sync on this one. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/management-plane-dataplane-interaction.md b/design-docs/src/mdbook/src/dataplane/tasks/management-plane-dataplane-interaction.md new file mode 100644 index 00000000..a8e5c199 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/management-plane-dataplane-interaction.md @@ -0,0 +1,9 @@ +# Management Plane - Dataplane Interaction + +We need to settle on how the management plane and dataplane interact. + +1. We need a transport protocol (eg., tcp session, http session with [SSE](https://en.wikipedia.org/wiki/Server-sent_events), [WebSocket](https://en.wikipedia.org/wiki/WebSocket)). +2. We need a protocol (schema for the data we transport) +3. We need an overall strategy (kill-and-fill or differential updates) + + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/one-control-plane-daemon-per-container.md b/design-docs/src/mdbook/src/dataplane/tasks/one-control-plane-daemon-per-container.md new file mode 100644 index 00000000..aed69cc3 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/one-control-plane-daemon-per-container.md @@ -0,0 +1,4 @@ +# One control plane daemon per container + +> [!NOTE] +> I think we can punt on this one! diff --git a/design-docs/src/mdbook/src/dataplane/tasks/performance-measurement.md b/design-docs/src/mdbook/src/dataplane/tasks/performance-measurement.md new file mode 100644 index 00000000..4417a429 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/performance-measurement.md @@ -0,0 +1,5 @@ +# Performance measurement + +I hate to say it, but this is going to be one of the last things we manage to get to. + +It will be challenging from a marketing perspective, but there is very little we can do about that. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md b/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md new file mode 100644 index 00000000..6727c5e9 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/pick-a-datastore.md @@ -0,0 +1,32 @@ +# Pick a data store + +We need to officially pick a data store for configuration information. + +This data store _is not_ intended for storing "fast" state. +Rather, this store needs to hold configuration data which is + +1. durable +2. atomic +3. strongly typed +4. immediately consistent + +[`etcd`] is a reasonable choice because + +1. It is already in use in kubernetes and is therefore likely to be well-maintained and tested. +2. we are already using / integrating with kubernetes so any flaws in `etcd` are likely to impact us anyway. + +I have used [`zookeeper`](https://zookeeper.apache.org/) in the past and *strongly recommend against it*. + +I would also consider [`consul`](https://github.com/hashicorp/consul) but [the license](https://github.com/hashicorp/consul/blob/main/LICENSE) is *_not_* acceptable. + +A newer entry in the space is [`nacos`](https://github.com/alibaba/nacos) but I think it is less well suited since it only seems to support eventual consistency. + +The remaining option I know of is [`rqlite`]. _I have not used it,_ but it seems to be a reasonable option. + +- has a supported [rust client](https://github.com/tomvoet/rqlite-rs) (and even a [sqlx](https://github.com/launchbadge/sqlx) client in the form of [sqlx-rqlite](https://crates.io/crates/sqlx-rqlite)) +- [weak](https://rqlite.io/docs/api/read-consistency/#weak), [linearizable](https://rqlite.io/docs/api/read-consistency/#linearizable), and [strong](https://rqlite.io/docs/api/read-consistency/#strong) consistency models supported +- [transactions](https://rqlite.io/docs/api/api/#transactions) (this seems less than ideal tho) + +Thus, I think the real choice is between [`etcd`] and [`rqlite`]. + +That choice comes down to how much we value the functionality of sqlite (multiple indexes, referential integrity, strong schema) vs. the upsides of [etcd] (watches, battle tested, and more widely used). diff --git a/design-docs/src/mdbook/src/dataplane/tasks/programmatic-control-of-frr.md b/design-docs/src/mdbook/src/dataplane/tasks/programmatic-control-of-frr.md new file mode 100644 index 00000000..5d3e3317 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/programmatic-control-of-frr.md @@ -0,0 +1,8 @@ +# Programmatic Control of FRR + +> [!NOTE] +> I am going to recommend we cheat in the short term and just use the reload method for the near term. + +> [!WARNING] +> **I DON'T WANT TO DO THAT LONG TERM!!!** + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/public-internet-access.md b/design-docs/src/mdbook/src/dataplane/tasks/public-internet-access.md new file mode 100644 index 00000000..2f396e08 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/public-internet-access.md @@ -0,0 +1,4 @@ +# Public internet access + +This is mostly distinct from the NAT tickets and [vpc routing](./vpc-routing.md) in the sense that we need to make extra sure our policy engine prohibits incorrect communication patterns. +Otherwise, the internet is just another VPC to us. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/rate-limiting-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks/rate-limiting-investigation.md new file mode 100644 index 00000000..adb4d38e --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/rate-limiting-investigation.md @@ -0,0 +1 @@ +# Rate limiting investigation diff --git a/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md b/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md new file mode 100644 index 00000000..8071c935 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/route-manager.md @@ -0,0 +1,8 @@ +# Route manager + +This is basically a big TODO. + +For the moment, I would like to get some more precise feature definition from [@sergeymatov]. + +It is also important to align this task with the [dataplane worker lifecycle]. + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/state-sync-design.md b/design-docs/src/mdbook/src/dataplane/tasks/state-sync-design.md new file mode 100644 index 00000000..9b6591f0 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/state-sync-design.md @@ -0,0 +1,7 @@ +# State sync (design) + +My major objection to this, as an issue, is that we are inherently eventually consistent (if consistent at all) in the two actor model. +It seems like we are setting ourselves up for the famous [Byzantine General's Problem](https://en.wikipedia.org/wiki/Byzantine_fault). + +> [!WARNING] +> Here be dragons! diff --git a/design-docs/src/mdbook/src/dataplane/tasks/state-sync-implementation.md b/design-docs/src/mdbook/src/dataplane/tasks/state-sync-implementation.md new file mode 100644 index 00000000..a8a4f142 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/state-sync-implementation.md @@ -0,0 +1 @@ +# State sync diff --git a/design-docs/src/mdbook/src/dataplane/tasks/state-sync.md b/design-docs/src/mdbook/src/dataplane/tasks/state-sync.md new file mode 100644 index 00000000..fa943d02 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/state-sync.md @@ -0,0 +1,3 @@ +# State sync + +I hesitate to make any comments on design until [NAT64 investigation](./NAT64-investigation.md) and [the design ticket](./state-sync-design.md) are further along. diff --git a/design-docs/src/mdbook/src/dataplane/tasks/telemetry-basic.md b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-basic.md new file mode 100644 index 00000000..e83741ea --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-basic.md @@ -0,0 +1,17 @@ +# Telemetry (basic) + +We need this implemented and hooked up. + +Skills required: + +1. [tracing] +2. [Kubernetes] +3. [graphana] +4. [loki] +5. [prometheus]? + +## Likely dispatch + +Anybody can take this one. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/telemetry-integration.md b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-integration.md new file mode 100644 index 00000000..0029f440 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-integration.md @@ -0,0 +1,12 @@ +# Telemetry (integration) + +We need this hooked up. + +The main tasks here will be: + +1. determine customer telemetry / observability requirements +2. integrate [tracing] with the customer's desired observability stack. +3. integrate [tracing] with our [management plane]. + + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/telemetry-investigation.md b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-investigation.md new file mode 100644 index 00000000..c54fcf62 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/telemetry-investigation.md @@ -0,0 +1,16 @@ +# Telemetry (investigation) + +Trace all the things! + +I could write a whole thing about the [tracing] crate, but I don't need to. +Go read these (excellent) docs: + +1. [tracing crate][tracing] +2. [subscribers](https://docs.rs/tracing/latest/tracing/#related-crates) + + +## Dispatch + +[@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/underlay-routing.md b/design-docs/src/mdbook/src/dataplane/tasks/underlay-routing.md new file mode 100644 index 00000000..8f50c7ce --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/underlay-routing.md @@ -0,0 +1,12 @@ +# Underlay routing + +Basic IPv4 / IPv6 routing + +Requirements: + +1. **REQUIRE**: span both NIC ports +2. **REQUIRE**: full hardware offloading +3. **REQUIRE**: basic fault tolerance +4. **REQUIRE**: [ARP] / [IPv6 ND] managed by [kernel] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/vpc-rate-limiting.md b/design-docs/src/mdbook/src/dataplane/tasks/vpc-rate-limiting.md new file mode 100644 index 00000000..ebdf647b --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/vpc-rate-limiting.md @@ -0,0 +1,8 @@ +# VPC rate-limiting + +Just rate limiting! + +Explicitly not full [QoS] for the moment. +If we involve [QoS] in the MVP then we will have zero chance on this timeline. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/vpc-routing.md b/design-docs/src/mdbook/src/dataplane/tasks/vpc-routing.md new file mode 100644 index 00000000..c701a0cd --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/vpc-routing.md @@ -0,0 +1,6 @@ +# VPC routing + +VPC routing is the process of routing within and between VPCs. + +Accomplishing this is a major milestone for the project. + diff --git a/design-docs/src/mdbook/src/dataplane/tasks/vxlan-tunnels.md b/design-docs/src/mdbook/src/dataplane/tasks/vxlan-tunnels.md new file mode 100644 index 00000000..fab89538 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/vxlan-tunnels.md @@ -0,0 +1,8 @@ +# VXLAN Decap + +We need systems in place to + +1. determine the [VXLAN] tunnels we should terminate / originate. +2. install and remove the [rte flow] rules needed to terminate / originate those tunnels. + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/dataplane/tasks/zebra-plugin.md b/design-docs/src/mdbook/src/dataplane/tasks/zebra-plugin.md new file mode 100644 index 00000000..92866b77 --- /dev/null +++ b/design-docs/src/mdbook/src/dataplane/tasks/zebra-plugin.md @@ -0,0 +1,24 @@ +# Zebra Plugin (basic) + +The dataplane and control plane need to communicate with each other regarding + +1. Full routing tables (for [state sync]) +2. route updates (i.e. differential updates) +3. route offloading status (including failures) +4. Address assignments, to ensure the dataplane can configure [local delivery](./identify-local-traffic.md) + +Keep in mind that route tables are, in general, notably more complex than a naive LPM trie, and may include like: + +1. [ECMP]/WCMP +2. [encapsulation rules](https://www.man7.org/linux/man-pages/man8/ip-route.8.html), +3. [nexthop groups](https://man7.org/linux/man-pages/man8/ip-nexthop.8.html), +4. multicast routes (this is unlikely to be important in the near term). + +We only expect to support basic IPv4 and IPv6 LPM routes in the near term, but feature evolution should be accounted for in the design. + +## Likely dispatch + +* [@Fredi-raspall] +* coordinate with: [@daniel-noland] + +{{#include ../../links.md}} diff --git a/design-docs/src/mdbook/src/links.md b/design-docs/src/mdbook/src/links.md new file mode 100644 index 00000000..0ba73213 --- /dev/null +++ b/design-docs/src/mdbook/src/links.md @@ -0,0 +1,71 @@ + + +[configuration store]: /dataplane/design.md#configuration-store +[control plane]: /dataplane/design.md#control-plane +[dataplane model]: /dataplane/design.md#dataplane-model +[dataplane worker]: /dataplane/design.md#dataplane-workers +[dataplane]: /dataplane/design.md#dataplane +[gateway agent]: /dataplane/design.md#gateway-agent +[hedgehog plugin]: /dataplane/design.md#hedgehog-plugin +[management plane interface]: /dataplane/design.md#management-plane-interface +[management plane]: /dataplane/design.md#management-plane +[nat manager]: /dataplane/design.md#nat-manager +[routing manager]: /dataplane/design.md#routing-manager +[state sync]: /dataplane/design.md#state-sync + + + +[ARP]: https://en.wikipedia.org/wiki/Address_Resolution_Protocol +[BFD]: https://en.wikipedia.org/wiki/Bidirectional_Forwarding_Detection +[ECMP]: https://en.wikipedia.org/wiki/Equal-cost_multi-path_routing +[IKE]: https://en.wikipedia.org/wiki/Internet_Key_Exchange +[IPsec]: https://en.wikipedia.org/wiki/IPsec +[IPv6 ND]: https://en.wikipedia.org/wiki/Neighbor_Discovery_Protocol +[LACP]: https://en.wikipedia.org/wiki/Link_aggregation#Link_Aggregation_Control_Protocol +[MySQL]: https://www.mysql.com/ +[NAT]: https://en.wikipedia.org/wiki/Network_address_translation +[QoS]: https://en.wikipedia.org/wiki/Quality_of_service +[TiDB]: https://www.pingcap.com/ +[TiKV]: https://tikv.org/ +[VXLAN]: https://en.wikipedia.org/wiki/Virtual_Extensible_LAN +[`bfdd`]: https://docs.frrouting.org/en/latest/bfd.html +[`bgpd`]: https://docs.frrouting.org/en/latest/bgp.html +[`etcd`]: https://github.com/coreos/etcd +[`rqlite`]: https://rqlite.io/ +[`zebra`]: https://docs.frrouting.org/en/latest/zebra.html +[bfdd]: https://docs.frrouting.org/en/latest/bfd.html +[bgpd]: https://docs.frrouting.org/en/latest/bgp.html +[bincode]: https://github.com/bincode-org/bincode?tab=readme-ov-file +[bindgen]: https://github.com/rust-lang/rust-bindgen +[bitcode]: https://crates.io/crates/bitcode/ +[bridge]: https://man7.org/linux/man-pages/man8/bridge.8.html +[capnproto]: https://capnproto.org/ +[cbindgen]: https://github.com/mozilla/cbindgen +[distributed SQL]: https://en.wikipedia.org/wiki/Distributed_SQL +[dpdk]: https://www.dpdk.org/ +[etcd]: https://github.com/coreos/etcd +[etherparse]: https://github.com/JulianSchmid/etherparse +[frr]: https://frrouting.org/ +[graphana]: https://grafana.com/ +[kernel]: https://en.wikipedia.org/wiki/Linux_kernel +[kubernetes]: https://kubernetes.io/ +[loki]: https://grafana.com/docs/loki/latest/get-started/overview/ +[netlink]: https://en.wikipedia.org/wiki/Netlink +[network address translation]: https://en.wikipedia.org/wiki/Network_address_translation +[prometheus]: https://prometheus.io/ +[protobuf]: https://protobuf.dev/ +[rqlite]: https://rqlite.io/ +[rte lcores]: https://doc.dpdk.org/api/rte__lcore_8h.html +[serde]: https://serde.rs/ +[tracing]: https://docs.rs/tracing/latest/tracing/ +[unix domain socket]: https://en.wikipedia.org/wiki/Unix_domain_socket +[zebra]: https://docs.frrouting.org/en/latest/zebra.html + + + +[@Fredi-raspall]: https://github.com/Fredi-raspall +[@cesargithedgehog]: https://github.com/cesargithedgehog +[@daniel-noland]: https://github.com/daniel-noland +[@qmonnet]: https://github.com/qmonnet +[@sergeymatov]: https://github.com/sergeymatov +[@thedvorkin]: https://github.com/thedvorkin