From 094da2aef3540a9848279dc31bb30e2c7db796d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 10 Oct 2024 12:00:52 +0200 Subject: [PATCH 01/88] Uncomment countly server --- ansible/inventory-legacy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/inventory-legacy b/ansible/inventory-legacy index 3988b30d..b14d1f76 100644 --- a/ansible/inventory-legacy +++ b/ansible/inventory-legacy @@ -54,7 +54,7 @@ ams-slack-1.ooni.org # Digital Ocean Amsterdam Hosts [doams] -#doams1-countly.ooni.nu +doams1-countly.ooni.nu # FIXME Disabled due to location tags not working as expected #ams-pg.ooni.org #ams-pg-test.ooni.org From f72960540824acb6fc68bd5d3559352ebe2b1ffc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 10 Oct 2024 15:48:48 +0200 Subject: [PATCH 02/88] Add findings list to monitoring --- ansible/roles/prometheus/vars/main.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index 6550cd51..abf6d469 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -95,6 +95,12 @@ blackbox_jobs: - "https://api.ooni.io/api/_/global_overview" - "https://api.ooni.org/api/_/global_overview" + - name: "ooni API findings listing" + module: "https_2xx_json" + targets: + - "https://api.ooni.io/api/v1/incidents/search" + - "https://api.ooni.org/api/v1/incidents/search" + # Note: this always returns true by design - name: "OONI API check_report_id" module: "https_2xx_json" From e43b060622d6fa810042d6ff01fbe39a137fb4ee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 10 Oct 2024 15:57:54 +0200 Subject: [PATCH 03/88] Refactor tags for monitoring roles --- ansible/roles/prometheus/tasks/main.yml | 10 ++++++++-- .../prometheus_alertmanager/tasks/main.yml | 20 +++++++++++++++---- .../tasks/main.yml | 10 ++++++++-- .../prometheus_node_exporter/tasks/main.yml | 12 +++++++++-- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/ansible/roles/prometheus/tasks/main.yml b/ansible/roles/prometheus/tasks/main.yml index 4980743f..d933611e 100644 --- a/ansible/roles/prometheus/tasks/main.yml +++ b/ansible/roles/prometheus/tasks/main.yml @@ -11,7 +11,10 @@ - "alert_*.yml" notify: - reload prometheus - tags: prometheus + tags: + - monitoring + - prometheus + - config - name: Configure Prometheus template: @@ -23,7 +26,10 @@ validate: "/usr/bin/promtool check config %s" notify: - reload prometheus - tags: prometheus + tags: + - monitoring + - prometheus + - config vars: prometheus_metrics_password_dev: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_dev') }}" prometheus_metrics_password_prod: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" diff --git a/ansible/roles/prometheus_alertmanager/tasks/main.yml b/ansible/roles/prometheus_alertmanager/tasks/main.yml index 271ef7c3..ec8e6803 100644 --- a/ansible/roles/prometheus_alertmanager/tasks/main.yml +++ b/ansible/roles/prometheus_alertmanager/tasks/main.yml @@ -1,5 +1,8 @@ - name: Installs packages - tags: monitoring, alertmanager + tags: + - monitoring + - alertmanager + - config apt: install_recommends: no cache_valid_time: 86400 @@ -7,7 +10,10 @@ - prometheus-alertmanager - name: Configure Alertmanager templates - tags: monitoring, alertmanager + tags: + - monitoring + - alertmanager + - config notify: - reload alertmanager copy: @@ -20,14 +26,20 @@ - templates/*.tmpl - name: Configure Alertmanager - tags: alertmanager + tags: + - monitoring + - alertmanager + - config lineinfile: path: /etc/default/prometheus-alertmanager regexp: "^ARGS=" line: ARGS='--cluster.listen-address= --web.listen-address="127.0.0.1:9093" --web.external-url="https://grafana.ooni.org"' - name: Reload Alertmanager - tags: alertmanager + tags: + - monitoring + - alertmanager + - config notify: - reload alertmanager template: diff --git a/ansible/roles/prometheus_blackbox_exporter/tasks/main.yml b/ansible/roles/prometheus_blackbox_exporter/tasks/main.yml index 58385d91..36d660fa 100644 --- a/ansible/roles/prometheus_blackbox_exporter/tasks/main.yml +++ b/ansible/roles/prometheus_blackbox_exporter/tasks/main.yml @@ -7,10 +7,16 @@ mode: 0644 notify: - restart blackbox_exporter - tags: blackbox_exporter + tags: + - monitoring + - blackbox_exporter + - config - name: Setcap command: setcap cap_net_raw=ep /usr/bin/prometheus-blackbox-exporter - tags: blackbox_exporter + tags: + - monitoring + - blackbox_exporter + - config notify: - restart blackbox_exporter diff --git a/ansible/roles/prometheus_node_exporter/tasks/main.yml b/ansible/roles/prometheus_node_exporter/tasks/main.yml index d33fe013..113863eb 100644 --- a/ansible/roles/prometheus_node_exporter/tasks/main.yml +++ b/ansible/roles/prometheus_node_exporter/tasks/main.yml @@ -10,7 +10,9 @@ node_exporter_host: "localhost" node_exporter_port: 8100 tags: + - monitoring - node_exporter + - config - name: create ooni configuration directory ansible.builtin.file: @@ -18,7 +20,9 @@ state: directory owner: root tags: + - monitoring - node_exporter + - config - name: Add a user to a password file and ensure permissions are set community.general.htpasswd: @@ -29,9 +33,11 @@ group: www-data mode: 0640 tags: + - monitoring - node_exporter + - config -- name: Setup oonidata nginx config +- name: Setup prometheus nginx config ansible.builtin.template: src: nginx-prometheus.j2 dest: /etc/nginx/sites-enabled/01-prometheus @@ -39,6 +45,7 @@ notify: - Restart nginx tags: + - monitoring - node_exporter - config @@ -51,6 +58,7 @@ notify: - Reload nftables tags: - - nftables + - monitoring - node_exporter - config + - nftables From 6c6e4405add75e2e33cff2e3ca8732cb557ad461 Mon Sep 17 00:00:00 2001 From: DecFox <33030671+DecFox@users.noreply.github.com> Date: Thu, 10 Oct 2024 19:37:23 +0530 Subject: [PATCH 04/88] fix(prod): increase asg min capacity (#106) `api.ooni.org` gives us a `503` when calling the incidents service. On further inspection, it seems the redeployment of the findings service has been halted due to insufficient memory in the ecs infra setup. This diff increase the asg capacity to a minimum of 4 ec2 instances which should suffice for now. --- tf/environments/prod/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 70e0ae5d..3c90337e 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -314,9 +314,9 @@ module "ooniapi_cluster" { subnet_ids = module.network.vpc_subnet_public[*].id # You need be careful how these are tweaked. - asg_min = 3 + asg_min = 4 asg_max = 8 - asg_desired = 3 + asg_desired = 4 instance_type = "t3.micro" From 6aa32d938eb3c257d398d1620a0a6c28e3433344 Mon Sep 17 00:00:00 2001 From: DecFox <33030671+DecFox@users.noreply.github.com> Date: Fri, 11 Oct 2024 00:07:15 +0530 Subject: [PATCH 05/88] refactor(prod): switch to using t3a.medium instances (#107) It seems we are low on memory on our `t3.micro` instances leading to impending task deployments. This should scale our instances enough to allow running all service tasks for now. --- tf/environments/prod/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 3c90337e..fa986c65 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -314,11 +314,11 @@ module "ooniapi_cluster" { subnet_ids = module.network.vpc_subnet_public[*].id # You need be careful how these are tweaked. - asg_min = 4 + asg_min = 2 asg_max = 8 - asg_desired = 4 + asg_desired = 2 - instance_type = "t3.micro" + instance_type = "t3a.medium" tags = merge( local.tags, From 9d1e88a0df7abbaf018051fcf8607232be9d69a3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 11 Oct 2024 17:48:16 +0200 Subject: [PATCH 06/88] Add record for oonifindings --- tf/environments/prod/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index fa986c65..cf228693 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -598,6 +598,7 @@ locals { "ooniauth.${local.environment}.ooni.io" : local.dns_zone_ooni_io, "ooniprobe.${local.environment}.ooni.io" : local.dns_zone_ooni_io, "oonirun.${local.environment}.ooni.io" : local.dns_zone_ooni_io, + "oonifindings.${local.environment}.ooni.io" : local.dns_zone_ooni_io, } ooniapi_frontend_main_domain_name = "api.${local.environment}.ooni.io" ooniapi_frontend_main_domain_name_zone_id = local.dns_zone_ooni_io From c866c469b9917d18275342a0edf032fe6a41bc5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Sat, 12 Oct 2024 12:13:51 +0200 Subject: [PATCH 07/88] Use same instance size on dev for ecs cluster --- tf/environments/dev/main.tf | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index d877a5da..09e4636c 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -314,9 +314,9 @@ module "ooniapi_cluster" { asg_min = 2 asg_max = 6 - asg_desired = 3 + asg_desired = 2 - instance_type = "t3.micro" + instance_type = "t3a.medium" tags = merge( local.tags, @@ -651,4 +651,4 @@ resource "aws_route53_record" "ooniapi_frontend_cert_validation" { resource "aws_acm_certificate_validation" "ooniapi_frontend" { certificate_arn = aws_acm_certificate.ooniapi_frontend.arn validation_record_fqdns = [for record in aws_route53_record.ooniapi_frontend_cert_validation : record.fqdn] -} \ No newline at end of file +} From 5ae6fc0ec10d1b03c9e2a9e60bc8e96685927fb8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 14 Oct 2024 16:38:21 +0200 Subject: [PATCH 08/88] Rotate docker log files after 100MB (#108) Test helpers were running out of disk space. This configuration was run manually, but we ought to at some point redeploy it on all hosts. Co-authored-by: decfox --- tf/modules/ooni_th_droplet/templates/cloud-init-docker.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tf/modules/ooni_th_droplet/templates/cloud-init-docker.yml b/tf/modules/ooni_th_droplet/templates/cloud-init-docker.yml index 4f82bcc6..93135daf 100644 --- a/tf/modules/ooni_th_droplet/templates/cloud-init-docker.yml +++ b/tf/modules/ooni_th_droplet/templates/cloud-init-docker.yml @@ -103,7 +103,9 @@ write_files: content: | { "ipv6": true, - "fixed-cidr-v6": "2001:db8:1::/64" + "fixed-cidr-v6": "2001:db8:1::/64", + "log-driver": "json-file", + "log-opts": {"max-size": "100m", "max-file": "3"} } - path: /etc/nginx/sites-available/default From 91b90a773c86502d2b9caf7e71b93310fec8f296 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 16 Oct 2024 14:04:36 +0200 Subject: [PATCH 09/88] Move researcher access machine to notebook.ooni.org --- ansible/host_vars/data.ooni.org | 7 ++- ansible/host_vars/notebook.ooni.org | 65 +++++++++++++++++++++ ansible/playbook.yml | 8 +++ ansible/roles/nginx/handlers/main.yml | 6 ++ ansible/roles/nginx/tasks/main.yml | 32 ++++++++++ ansible/roles/oonidata/handlers/main.yml | 1 + ansible/roles/oonidata/tasks/jupyterhub.yml | 11 +++- ansible/roles/oonidata/tasks/main.yml | 9 +++ 8 files changed, 137 insertions(+), 2 deletions(-) create mode 100644 ansible/host_vars/notebook.ooni.org diff --git a/ansible/host_vars/data.ooni.org b/ansible/host_vars/data.ooni.org index 7763cdf7..38c40068 100644 --- a/ansible/host_vars/data.ooni.org +++ b/ansible/host_vars/data.ooni.org @@ -54,7 +54,12 @@ ssh_users: [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMJYsbeTjdma5cKyZISOFQfHbwwlZbWugPx9haeOx1UR" ] + michael: + login: michael + comment: "Micheal Collyer" + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPN4Ae+KfZEbhJuvHI3PXjgeu4V0ZFIpUy9bFuBKx76W michael.collyer@oii.ox.ac.uk" admin_usernames: [ art, majakomel, mehul, norbel ] non_admin_usernames: [ ain, siti, ingrid, joss, vasilis ] jupyterhub_allowed_users: "{{ ssh_users }}" -admin_group_name: adm \ No newline at end of file +admin_group_name: adm diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org new file mode 100644 index 00000000..096247b1 --- /dev/null +++ b/ansible/host_vars/notebook.ooni.org @@ -0,0 +1,65 @@ +ssh_users: + agrabeli: + login: agrabeli + comment: Maria Xynou + keys: ["ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD0JSwM+t3Uz9lS3Mjoz9oo4vOToWyzboZhYQbP8JY5HvFtAvWanWHnUBO91t6hkgKIMiUqhdCJn26fqkhSGe/bRBaFUocOmuyfcmZoRdi0qzAskmycJsj/w6vWR4x6MYkmJvSeI/MGxjEFt4s2MfOG1tP8CBLUYft9qUleeJa7Jln8c+xbnqB7YngaI190icQHE9NuIB2CXvzbmo3tLtHNMagEwI7VoBDj6mxzTxBd9JhuhF4w5uGxxm0Gp1hzk+15obNnaBS+Anr7jXz8FPwwxCH+XhBZxB1PPpcIayKrf9iLyGtwmhkdDoWCqYAr1mue3LxFso+TZF4bwE4Cjt1 agrabelh@agrabelh"] + art: + login: art + comment: Arturo Filasto + keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJsibU0nsQFFIdolD1POzXOws4VetV0ZNByINRzY8Hx0 arturo@ooni.org"] + majakomel: + login: majakomel + comment: Maja Komel + keys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC7gWQL4h/IyMbwDuMIXbTVmNEm8Yx19Ftt0P2e3OyWctSMH7WGaHc6b0dGoGh6Y4x0Kpw5h0iHWshP8Rg0pckNG9LeDjLY9nLR3Jv66ogFQtFi1DAlg4CXe369N70rBN9iurndgXjShW9OV+bY+MOlW8Fmmm67Vg0xFiYuYzjgUOpl4ofkbLGAQ7sJRBzpDV6TqHhGfOdYMDJyfFvurVz0oSyEZPFFRv4Css9iVk7BGsBukCCpUuax8akEeEjxWWCvjYXva7OA0jHKayfPAroZx/OJh01rhFe7wxlu5JwUKOcevvAZqeHh6200C82ijZOCN+Qq9yvxOH+OgzhnQwnoetIbGFgnb4CkDxo7dVLc/DFyObznC4f26f5D1OyPMUX8AEarEVdEPwsEfD2ePQr6qek0XWCWtYvGklb+GRLk9Yn0VL1qwvgrtstHdeXsKONTPKRxaCjWHu18dQaG2qOUnZ+St6SHeL49CN9aav2azNI/YKoQ9SGR4D23XeBRsW8=" + mehul: + login: mehul + comment: Mehul Gulati + keys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDEZSA9TKUaYWG8gfnMoyDZO2S6vsy87xma4R/EzNpveZiOZTYSNn+UDL8NpQRuH5YgdWuQV2E7sKw/PIYA0lC/QTiq8Btqf6sEK5YWXtQy+yn9q5kB/rmi8zjaz0FUNigRrjL+26ao+c7NKpgmR+TRqbRd5VeJ46PuFD5M3c+MBeUoF1PT0zfioQFJ1mQoXwVix0n260clEXQDp4t0GZuNpWGTS+YTuJZ2vl6TDZtt8jrnENd99QArr2KU+NMTq8T2KYcPeQOoYsm7v/1TBkbv9UStllhjdE7HZSivPT8oRkF2YZYgytDxtCZG8i5iCK+vbNn6QmZMjuXPoBUeW+Njm70tlsirrKpUX+QiogA2qljxPD9st2eUkA7cATyOBkK7WLh1HYv2xyKpPtkkaELG+EHjmaVjVdyVAgUYwqg+MbIw1OyDpNmMZcW3iOpGpflXPMmLjKNMhee0//G7NxcGfwmIMbIiBkeofOnWDrMo+0PRULFtn6C7aA7ddirck+k=" + norbel: + login: norbel + comment: Norbel Ambanumben + keys: + - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDBXprrutdT6AhrV9hWBKjyzq6RqGmCBWpWxi3qwJyRcBJfkiEYKV9QWl3H0g/Sg9JzLd9lWG2yfAai7cyBAT4Ih0+OhwQ0V7wkhBn4YkNjs7d4BGPHjuLIywS9VtmiyH7VafikMjmqPLL/uPBIbRrx9RuSfLkAuN9XFZpVmqzWY8ePpcRCvnG6ucPxEY8o+4j5nfTrgxSaIT31kH16/PFJe07tn1SZjxZE4sZTz/p9xKt6s8HXmlP3RdnXSpXWmH8ZwYDrNhkcH8m6mC3giiqSKThFdwvQVflRRvn9pAlUOhy6KIBtAt1KobVJtOCPrrkcLhQ1C+2P9wKhfYspCGrScFGnrUqumLxPpwlqILxJvmgqGAtkm8Ela9f2D9sEv8CUv5x9XptZKlyRhtOLixvLYoJlwfXXnmXa8T1pg8+4063BhHUOu/bg0InpSp3hdscOfk0R8FtDlXnn6COwbPXynIt4PxzIxD/WQhP0ymgH3ky6ClB5wRBVhOqYvxQw32n2QFS9A5ocga+nATiOE7BTOufgmDCA/OIXfJ/GukXRaMCBsvlx7tObHS1LOMt0I+WdoOEjI0ARUrFzwoiTrs9QYmd922e7S35EnheT3JjnCTjebJrCNtwritUy8vjsN/M27wJs7MAXleT7drwXXnm+3xYrH+4KQ+ru0dxMe1zfBw== aanorbel@gmail.com" + ain: + login: ain + comment: Ain + keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIH6Js4xtJq7AoYA8mFraQg8vYgKz/glil9AaPq4lDwtg ain@intertubes"] + joss: + login: joss + comment: Joss Wright + keys: + [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC40MVrMUuP93UvmyTc6cGGKbdurK+CbuoQW0b4i20EPf8pjXjrTS3b/plh7y4egUfx7/2e5l321Ui8E4tuzDWjtJsSFY9l31msEnw6PTHMzOj8kVBWqHWidVZHYPpd9eVa+3ALL9HmLDQuwyhIXXaU2ExS3XZH0GJPUxgf8tubH7qteyANWTIh1XhV0fnoeBo3fvdGVkNiMLc1PSDp/iprMurdsvwCJC42+Z5R35ORpK7FJhr38Js2Ag1NwUpo3Li+PhErfoQ5A+x64p1NRm1Bnw1Z8eFHuDI6WXuzUHhuy+11M92CtaAVEoblfx75SaCftoiO0Khn6kZ9XDed+JM1 joss@pseudonymity.net" + ] + ingrid: + login: ingrid + comment: Ingrid Epure + keys: + [ + "ssh-rsa AAAB3NzaC1yc2EAAAADAQABAAACAQCTJQDb/Ucq5CRGqSJbNz33pB6fYtk7Pi+6LlIaV9QLhByp/G2/g6ae6Eb/TimZtxpdeIwpAmACmUn2p+mCLMHjpollUK2f3dUjmXiUSNGMPRPRxQoIvzf56patUCQRS+S7zDUKTDW/5e18CrIj0sFCC27y/pS6mmmeedHA6gmpW7L6kM57BlsxFu79rr/o/nrNH+qceJBEd8fM93yoIdEwxPHZyKJ5kj9+lh+4TtDLxxkwFfc6Kce1d0qxfpX1NzIbK5Vp8JlXrGEWbOFFT8S7Ru+j1/g/ptUjsXJ7DpH1wwlF6wYsU0DJuhkLv6XFZQuoHYwpZ4jmnJRWrXSgdylPk67M5Dr9aB2j0WGJNZysiXVZQZmoMUhfrNxaGVv6gB48krE6ysUoLrenR68aLOYqF8Yqvu1lCIyds1ORtjnpxWxFB7NS89us4KFofAMW+qeg/g3nEYvln9/S0b58goToNIw/p7wP9WOeh7JuM/FBT5ahJbeYpXapJh1WW6Rt48RGVwxFLXbcnH8wpCfhUw7fIVpXMhbfhtWTlWVJEAyk3eLWdNEJ7AH6jaqTdfTa4qBgrof0MgoZrb64qFDAsG9Z80Uj9oC2Zdy+gwDu76WJQfSKaD7hmq0w8khoFSVju7fvcfd5HWgLZbptCIw51mJSMQIQWs8Y/iGijTSckXXCXQ==", + ] + siti: + login: siti + comment: "Siti Nurliza" + keys: + [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKqG1VepfzDdSp3zG27jZq3S9/62CKPLh93F///ht9rf", + ] + vasilis: + login: vasilis + comment: "Vasilis Ververis" + keys: + [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMJYsbeTjdma5cKyZISOFQfHbwwlZbWugPx9haeOx1UR" + ] + michael: + login: michael + comment: "Micheal Collyer" + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPN4Ae+KfZEbhJuvHI3PXjgeu4V0ZFIpUy9bFuBKx76W michael.collyer@oii.ox.ac.uk" +admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] +non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael ] +jupyterhub_allowed_users: "{{ ssh_users }}" +admin_group_name: admin diff --git a/ansible/playbook.yml b/ansible/playbook.yml index dece3fbe..b71d2212 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -48,6 +48,14 @@ roles: - oonidata +- name: Deploy notebook hosts + hosts: notebook.ooni.org + become: true + vars: + enable_oonipipeline_worker: false + roles: + - oonidata + # commented out due to the fact it requires manual config of ~/.ssh/config #- name: Setup codesign box # hosts: codesign-box diff --git a/ansible/roles/nginx/handlers/main.yml b/ansible/roles/nginx/handlers/main.yml index eb1d1671..dff67402 100644 --- a/ansible/roles/nginx/handlers/main.yml +++ b/ansible/roles/nginx/handlers/main.yml @@ -13,3 +13,9 @@ service: name: nginx state: reloaded + +- name: reload nftables + tags: nftables + ansible.builtin.systemd_service: + name: nftables + state: reloaded diff --git a/ansible/roles/nginx/tasks/main.yml b/ansible/roles/nginx/tasks/main.yml index b93304c1..23bb4ecd 100644 --- a/ansible/roles/nginx/tasks/main.yml +++ b/ansible/roles/nginx/tasks/main.yml @@ -37,3 +37,35 @@ notify: reload nginx tags: - nginx + +- name: create config dir + ansible.builtin.file: + path: /etc/ooni/nftables/tcp + state: directory + owner: root + group: root + mode: 0755 + tags: + - nftables + +- name: allow incoming TCP connections to Nginx port 80 + blockinfile: + path: /etc/ooni/nftables/tcp/80.nft + create: yes + block: | + add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" + notify: + - reload nftables + tags: + - nginx + +- name: allow incoming TCP connections to Nginx port 443 + blockinfile: + path: /etc/ooni/nftables/tcp/443.nft + create: yes + block: | + add rule inet filter input tcp dport 443 counter accept comment "incoming HTTP" + notify: + - reload nftables + tags: + - nginx diff --git a/ansible/roles/oonidata/handlers/main.yml b/ansible/roles/oonidata/handlers/main.yml index f12d0aa6..61dac259 100644 --- a/ansible/roles/oonidata/handlers/main.yml +++ b/ansible/roles/oonidata/handlers/main.yml @@ -14,3 +14,4 @@ ansible.builtin.systemd_service: name: nginx state: reloaded + diff --git a/ansible/roles/oonidata/tasks/jupyterhub.yml b/ansible/roles/oonidata/tasks/jupyterhub.yml index b6fa2f07..040cdeab 100644 --- a/ansible/roles/oonidata/tasks/jupyterhub.yml +++ b/ansible/roles/oonidata/tasks/jupyterhub.yml @@ -3,12 +3,14 @@ ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -c conda-forge -y jupyterhub" tags: + - oonidata - jupyterhub - name: Install jupyterlab and notebook ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -y jupyterlab notebook" tags: + - oonidata - jupyterhub - name: Install jupyterhub packages @@ -16,12 +18,14 @@ name: - npm tags: + - oonidata - jupyterhub - name: Install configurable-http-proxy ansible.builtin.shell: cmd: "npm install -g configurable-http-proxy" tags: + - oonidata - jupyterhub - name: Create jupyterhub directories @@ -33,6 +37,7 @@ - "{{ jupyterhub_runtime_dir }}" - "{{ jupyterhub_runtime_dir }}/state" tags: + - oonidata - jupyterhub - name: Write jupyterhub config @@ -44,6 +49,7 @@ notify: - Restart jupyterhub tags: + - oonidata - jupyterhub - config @@ -57,6 +63,7 @@ notify: - Restart jupyterhub tags: + - oonidata - jupyterhub - name: Ensure the JupyterHub service is started with daemon-reload @@ -66,6 +73,7 @@ enabled: true daemon_reload: true tags: + - oonidata - jupyterhub - config @@ -78,5 +86,6 @@ notify: - Reload nginx tags: + - oonidata - jupyterhub - - config \ No newline at end of file + - config diff --git a/ansible/roles/oonidata/tasks/main.yml b/ansible/roles/oonidata/tasks/main.yml index fa19b49b..a13328e2 100644 --- a/ansible/roles/oonidata/tasks/main.yml +++ b/ansible/roles/oonidata/tasks/main.yml @@ -6,27 +6,33 @@ shell: /bin/false createhome: no tags: + - oonidata - oonipipeline - jupyterhub - ansible.builtin.include_role: name: miniconda tags: + - oonidata - conda - ansible.builtin.import_tasks: jupyterhub.yml when: enable_jupyterhub tags: + - oonidata - jupyterhub - ansible.builtin.include_role: name: nginx tags: + - oonidata - nginx + - ansible.builtin.include_role: name: geerlingguy.certbot tags: + - oonidata - certbot vars: certbot_admin_email: admin@ooni.org @@ -45,15 +51,18 @@ - curl - git tags: + - oonidata - oonipipeline - name: Install OONI pipeline from pip ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/pip install -e 'git+https://github.com/ooni/data#egg=oonipipeline&subdirectory=oonipipeline'" tags: + - oonidata - oonipipeline - ansible.builtin.import_tasks: oonipipeline-worker.yml when: enable_oonipipeline_worker tags: + - oonidata - oonipipeline From 1f087d7d900bde238f97281032db6444f04eb217 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 16 Oct 2024 16:40:32 +0200 Subject: [PATCH 10/88] Add socat proxy to oonidata host --- ansible/roles/oonidata/defaults/main.yml | 1 + ansible/roles/oonidata/handlers/main.yml | 7 ++++++- ansible/roles/oonidata/tasks/main.yml | 14 +++++++++++++- .../oonidata/templates/oonidata-proxy.service.j2 | 15 +++++++++++++++ 4 files changed, 35 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/oonidata/templates/oonidata-proxy.service.j2 diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index c2b0d9d8..144c8261 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -5,4 +5,5 @@ oonipipeline_runtime_dir: /srv/oonipipeline tls_cert_dir: /etc/letsencrypt/live admin_group_name: admin enable_oonipipeline_worker: true +enable_oonidata_proxy: false enable_jupyterhub: true diff --git a/ansible/roles/oonidata/handlers/main.yml b/ansible/roles/oonidata/handlers/main.yml index 61dac259..df50cce8 100644 --- a/ansible/roles/oonidata/handlers/main.yml +++ b/ansible/roles/oonidata/handlers/main.yml @@ -10,8 +10,13 @@ state: restarted daemon_reload: true +- name: Restart oonidata-proxy + ansible.builtin.systemd_service: + name: oonidata-proxy + state: restarted + daemon_reload: true + - name: Reload nginx ansible.builtin.systemd_service: name: nginx state: reloaded - diff --git a/ansible/roles/oonidata/tasks/main.yml b/ansible/roles/oonidata/tasks/main.yml index a13328e2..c7cc4cdd 100644 --- a/ansible/roles/oonidata/tasks/main.yml +++ b/ansible/roles/oonidata/tasks/main.yml @@ -28,7 +28,6 @@ - oonidata - nginx - - ansible.builtin.include_role: name: geerlingguy.certbot tags: @@ -50,6 +49,7 @@ - net-tools - curl - git + - socat tags: - oonidata - oonipipeline @@ -66,3 +66,15 @@ tags: - oonidata - oonipipeline + +- name: Write oonidataproxy service + ansible.builtin.template: + src: oonidata-proxy.service.j2 + dest: "/etc/systemd/system/oonidata-proxy.service" + owner: root + group: root + mode: "0644" + notify: + - Restart oonidata-proxy + tags: + - oonipipeline diff --git a/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 b/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 new file mode 100644 index 00000000..ca4f6f0a --- /dev/null +++ b/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 @@ -0,0 +1,15 @@ +# JupyterHub systemd service +[Unit] + +[Service] +User=oonipipeline +Restart=always +PrivateTmp=yes +PrivateDevices=yes +ProtectKernelTunables=yes +ProtectKernelModules=yes +ExecStart="socat tcp-listen:9000,reuseaddr,fork tcp:data.ooni.org:9000" + +[Install] +# Start service when system boots +WantedBy=multi-user.target From 102fba63e459a55024b53124585446e39904a492 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 16 Oct 2024 16:55:45 +0200 Subject: [PATCH 11/88] Move pip installs into oonidata role --- ansible/roles/miniconda/tasks/main.yml | 8 +------- ansible/roles/oonidata/tasks/main.yml | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/ansible/roles/miniconda/tasks/main.yml b/ansible/roles/miniconda/tasks/main.yml index 0ea358b3..0bddf448 100644 --- a/ansible/roles/miniconda/tasks/main.yml +++ b/ansible/roles/miniconda/tasks/main.yml @@ -12,10 +12,4 @@ - include_tasks: install.yml when: not miniconda_bin.stat.exists -- name: "install conda packages" - ansible.builtin.shell: - cmd: "{{ miniconda_install_dir }}/bin/conda install -y {{ item }}" - loop: - - pandas - - numpy - - altair + diff --git a/ansible/roles/oonidata/tasks/main.yml b/ansible/roles/oonidata/tasks/main.yml index c7cc4cdd..5fa59808 100644 --- a/ansible/roles/oonidata/tasks/main.yml +++ b/ansible/roles/oonidata/tasks/main.yml @@ -61,6 +61,20 @@ - oonidata - oonipipeline +- name: "install conda packages" + ansible.builtin.shell: + cmd: "{{ miniconda_install_dir }}/bin/conda install -y {{ item }}" + loop: + - pandas + - numpy + - altair + +- name: "install pip packages" + ansible.builtin.shell: + cmd: "{{ miniconda_install_dir }}/bin/pip install {{ item }}" + loop: + - clickhouse-driver + - ansible.builtin.import_tasks: oonipipeline-worker.yml when: enable_oonipipeline_worker tags: From c440627250144de62f3c897b316d7c39939a5163 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 16 Oct 2024 17:02:35 +0200 Subject: [PATCH 12/88] Fix systemd unit --- ansible/roles/oonidata/templates/oonidata-proxy.service.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 b/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 index ca4f6f0a..ea539876 100644 --- a/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 +++ b/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 @@ -8,7 +8,7 @@ PrivateTmp=yes PrivateDevices=yes ProtectKernelTunables=yes ProtectKernelModules=yes -ExecStart="socat tcp-listen:9000,reuseaddr,fork tcp:data.ooni.org:9000" +ExecStart=/usr/bin/socat tcp-listen:9000,reuseaddr,fork tcp:data.ooni.org:9000 [Install] # Start service when system boots From 542d87b9e1f88e79911917771794b35ec33ad44d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 17 Oct 2024 14:05:25 +0200 Subject: [PATCH 13/88] Move package installs into variable --- ansible/roles/oonidata/defaults/main.yml | 11 ++++++++++ ansible/roles/oonidata/tasks/main.yml | 28 ++++++++++++++---------- 2 files changed, 28 insertions(+), 11 deletions(-) diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index 144c8261..4c3b844b 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -7,3 +7,14 @@ admin_group_name: admin enable_oonipipeline_worker: true enable_oonidata_proxy: false enable_jupyterhub: true +conda_forge_packages: + - seaborn + - dask +conda_packages: + - pandas + - numpy + - altair + - bokeh +pip_packages: + - "-e 'git+https://github.com/ooni/data#egg=oonipipeline&subdirectory=oonipipeline'" + - "clickhouse-driver" diff --git a/ansible/roles/oonidata/tasks/main.yml b/ansible/roles/oonidata/tasks/main.yml index 5fa59808..f2040bd2 100644 --- a/ansible/roles/oonidata/tasks/main.yml +++ b/ansible/roles/oonidata/tasks/main.yml @@ -53,27 +53,33 @@ tags: - oonidata - oonipipeline + - packages -- name: Install OONI pipeline from pip +- name: "install conda packages" ansible.builtin.shell: - cmd: "{{ miniconda_install_dir }}/bin/pip install -e 'git+https://github.com/ooni/data#egg=oonipipeline&subdirectory=oonipipeline'" + cmd: "{{ miniconda_install_dir }}/bin/conda install -y {{ item }}" + loop: "{{ conda_packages }}" tags: - oonidata - oonipipeline + - packages -- name: "install conda packages" +- name: Install conda-forge packages ansible.builtin.shell: - cmd: "{{ miniconda_install_dir }}/bin/conda install -y {{ item }}" - loop: - - pandas - - numpy - - altair + cmd: "{{ miniconda_install_dir }}/bin/conda install -c conda-forge -y {{ item }}" + loop: "{{ conda_forge_packages }}" + tags: + - oonidata + - packages -- name: "install pip packages" +- name: "Install pip packages" ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/pip install {{ item }}" - loop: - - clickhouse-driver + loop: "{{ pip_packages }}" + tags: + - oonidata + - oonipipeline + - packages - ansible.builtin.import_tasks: oonipipeline-worker.yml when: enable_oonipipeline_worker From afccf346da67ac7a214d6655f1ee1dd3037d9b03 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 17 Oct 2024 15:11:44 +0200 Subject: [PATCH 14/88] Add pomegranate --- ansible/roles/oonidata/defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index 4c3b844b..83787efa 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -18,3 +18,4 @@ conda_packages: pip_packages: - "-e 'git+https://github.com/ooni/data#egg=oonipipeline&subdirectory=oonipipeline'" - "clickhouse-driver" + - pomegranate From 00ff542da5c5c8f1af5dabbeeb2caa2677701857 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 17 Oct 2024 18:11:52 +0200 Subject: [PATCH 15/88] Add pgmpy package --- ansible/roles/oonidata/defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index 83787efa..e497864d 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -19,3 +19,4 @@ pip_packages: - "-e 'git+https://github.com/ooni/data#egg=oonipipeline&subdirectory=oonipipeline'" - "clickhouse-driver" - pomegranate + - pgmpy From cd561e3e5118a829d759539e65cb54419666fc48 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 21 Oct 2024 12:07:22 +0200 Subject: [PATCH 16/88] Add statsmodels --- ansible/roles/oonidata/defaults/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index e497864d..a3ae96df 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -10,6 +10,7 @@ enable_jupyterhub: true conda_forge_packages: - seaborn - dask + - statsmodels conda_packages: - pandas - numpy From e8a68124bd4aa4a1fb2d8f2757a783f444490748 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 6 Nov 2024 17:58:40 +0100 Subject: [PATCH 17/88] Add ben --- ansible/host_vars/notebook.ooni.org | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 096247b1..029edcc1 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -59,7 +59,13 @@ ssh_users: comment: "Micheal Collyer" keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPN4Ae+KfZEbhJuvHI3PXjgeu4V0ZFIpUy9bFuBKx76W michael.collyer@oii.ox.ac.uk" + benginoe: + login: benginoe + comment: "Ben Ginoe" + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOWdWCATiHUAzoS3mn3pFMIYDmi3n4Ekuzv5cEtvV0W1 root@parrot" + admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] -non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael ] +non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe ] jupyterhub_allowed_users: "{{ ssh_users }}" admin_group_name: admin From fbfcb83118ac9b214bd125f952632e274cccdae6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 11 Nov 2024 12:24:18 +0100 Subject: [PATCH 18/88] Add notes about increasing disk size --- docs/disk-increase.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 docs/disk-increase.md diff --git a/docs/disk-increase.md b/docs/disk-increase.md new file mode 100644 index 00000000..b977c991 --- /dev/null +++ b/docs/disk-increase.md @@ -0,0 +1,17 @@ +Below are some notes on how to resize the disks when a new drive is added to +our dedicated hosts: + +``` +fdisk /dev/nvme3n1 +# create gpt partition table and new RAID 5 (label 42) partition using the CLI +mdadm --manage /dev/md3 --add /dev/nvme3n1p1 +cat /proc/mdstat +# Take note of the volume count (4) and validate that nvme3n1p1 is marked as spare ("S") +mdadm --grow --raid-devices=4 /dev/md3 +``` + +``` +# resize2fs /dev/md3 +# df -h | grep md3 +/dev/md3 2.6T 1.2T 1.3T 48% / +``` From 34da54d7eaf773ddeee30afd14ee61421cfdcd86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 11 Nov 2024 14:04:43 +0100 Subject: [PATCH 19/88] Add Hostname naming policy to docs --- README.md | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5d2398e8..a5069ca4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,8 @@ # OONI Devops +This document outlines some of the best practices we follow when developing and +deploying OONI services. + ## Infrastructure Tiers We divide our infrastructure components into 3 tiers: @@ -22,7 +25,7 @@ We divide our infrastructure components into 3 tiers: - [ ] OONI bridges - [ ] OONI.org website - [x] Web Connectivity test helpers -- [ ] Code signing +- [x] Code signing ### Tier 1 (Essential) components @@ -30,7 +33,7 @@ We divide our infrastructure components into 3 tiers: - [x] OONI Explorer - [x] OONI Run - [ ] OONI Data analysis pipeline -- [ ] OONI Findings API +- [x] OONI Findings API - [x] Website analytics ### Tier 2 (Non-Essential) components @@ -38,3 +41,79 @@ We divide our infrastructure components into 3 tiers: - [ ] Test list editor - [ ] Jupyter notebooks - [ ] Countly + +## DNS and Domains + +The primary domains used by the backend are: +- `ooni.org` +- `ooni.io` +- `ooni.nu` + +### DNS naming policy + +The public facing name of services, follows this format: + +- `.ooni.org` + +Examples: + +- `explorer.ooni.org` +- `run.ooni.org` + +Public-facing means the FQDNs are used directly by external users, services, or +embedded in the probes. They cannot be changed or retired without causing +outages. + +Use public facing names sparingly and when possible start off by creating a +private name first. +Not every host needs to have a public facing name. For example staging and +testing environments might not have a public facing name. + +Each service also has public name which points to the specific host running that +service, and these are hosted in the `.io` zone. +This is helpful because sometimes you might have the same host running multiple +services or you might also have multiple services behind the same public service +endpoint (eg. in the case of an API gateway setup). + +Name in the `.io` zone should always include also the environment name they are +related to: + +- `.prod.ooni.io` for production services +- `.test.ooni.io` for test services + +When there may be multiple instances of a service running, you can append a +number to the service name. Otherwise the service name should be only alphabetic +characters. + +Examples: + +- `clickhouse.prod.ooni.io` +- `postgres0.prod.ooni.io` +- `postgres1.prod.ooni.io` +- `prometheus.prod.ooni.io` +- `grafana.prod.ooni.io` + +Finally, the actual host which runs the service, should have a FQDN defined +inside of the `.nu` zone. + +This might not apply to every host, especially in a cloud environment. The FQDN +in the `.nu` are the ones which are going to be stored in the ansible inventory +file and will be used as targets for configuration management. + +The structure of these domains is: + +- `..[prod|test].ooni.nu` + +The location tag can be either just the provider name or provider name `-` the location. + +Here is a list of location tags: + +- `htz-fsn`: Hetzner on Falkenstein +- `htz-hel`: Hetzner in Helsinki +- `grh-ams`: Greenhost in Amsterdam +- `grh-mia`: Greenhost in Miami +- `aws-fra`: AWS in Europe (Frankfurt) + +Examples: + +- `monitoring.htz-fsn.prod.ooni.nu` From db8334abe90276b5437f0db21bceca6a45c1cb39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 22 Nov 2024 16:40:17 +0100 Subject: [PATCH 20/88] Prod pipeline v5 (#112) Devops related work to get OONI Pipeline v5 production ready --- ansible/group_vars/all/vars.yml | 6 +- ansible/group_vars/clickhouse/vars.yml | 177 ++++++++++++++++ ansible/group_vars/dev/vars.yml | 4 +- ansible/group_vars/prod/vars.yml | 8 +- ansible/host_vars/data3.htz-fsn.prod.ooni.nu | 2 + ansible/host_vars/notebook.ooni.org | 26 ++- ansible/inventory | 9 + ansible/playbook-bootstrap.yml | 1 - ansible/playbook.yml | 54 ++--- ansible/requirements.yml | 7 +- ansible/roles/bootstrap/handlers/main.yml | 18 ++ ansible/roles/bootstrap/tasks/main.yml | 44 ++++ ansible/roles/bootstrap/templates/bashrc | 113 ++++++++++ .../roles/bootstrap/templates/journald.conf | 11 + .../roles/bootstrap/templates/resolved.conf | 9 + ansible/roles/clickhouse/tasks/main.yml | 73 ------- .../roles/clickhouse/templates/ooni_users.xml | 30 --- ansible/roles/miniconda/tasks/main.yml | 3 +- ansible/roles/nftables/defaults/main.yml | 10 + ansible/roles/nftables/tasks/main.yml | 15 +- .../roles/nftables/templates/nftables.conf | 1 - ansible/roles/nftables/templates/rule.nft.j2 | 4 + ansible/roles/nginx/handlers/main.yml | 6 - ansible/roles/nginx/tasks/main.yml | 36 ++-- ansible/roles/oonidata/defaults/main.yml | 9 +- ansible/roles/oonidata/meta/requirements.yml | 3 + ansible/roles/oonidata/tasks/jupyterhub.yml | 22 ++ ansible/roles/oonidata/tasks/main.yml | 47 +---- .../templates/oonidata-proxy.service.j2 | 15 -- .../templates/oonipipeline-config.toml.j2 | 3 +- .../oonidata_clickhouse/defaults/main.yml | 0 .../oonidata_clickhouse/handlers/main.yml | 1 + .../roles/oonidata_clickhouse/tasks/main.yml | 38 ++++ .../handlers/main.yml | 6 - .../prometheus_node_exporter/tasks/main.yml | 15 +- ansible/roles/ssh_users/tasks/main.yml | 23 +- ansible/roles/tailnet/tasks/main.yml | 4 + docs/merge-tree-replication.md | 127 +++++++++++ tf/environments/prod/.terraform.lock.hcl | 198 +++++++++--------- tf/environments/prod/dns_records.tf | 48 +++++ tf/environments/prod/main.tf | 12 ++ tf/environments/prod/versions.tf | 4 + 42 files changed, 892 insertions(+), 350 deletions(-) create mode 100644 ansible/group_vars/clickhouse/vars.yml create mode 100644 ansible/host_vars/data3.htz-fsn.prod.ooni.nu create mode 100644 ansible/roles/bootstrap/handlers/main.yml create mode 100644 ansible/roles/bootstrap/templates/bashrc create mode 100644 ansible/roles/bootstrap/templates/journald.conf create mode 100644 ansible/roles/bootstrap/templates/resolved.conf delete mode 100644 ansible/roles/clickhouse/tasks/main.yml delete mode 100644 ansible/roles/clickhouse/templates/ooni_users.xml create mode 100644 ansible/roles/nftables/defaults/main.yml create mode 100644 ansible/roles/nftables/templates/rule.nft.j2 create mode 100644 ansible/roles/oonidata/meta/requirements.yml delete mode 100644 ansible/roles/oonidata/templates/oonidata-proxy.service.j2 create mode 100644 ansible/roles/oonidata_clickhouse/defaults/main.yml create mode 100644 ansible/roles/oonidata_clickhouse/handlers/main.yml create mode 100644 ansible/roles/oonidata_clickhouse/tasks/main.yml create mode 100644 ansible/roles/tailnet/tasks/main.yml create mode 100644 docs/merge-tree-replication.md diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 936fd374..17712861 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -23,7 +23,7 @@ ssh_users: keys: - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDBXprrutdT6AhrV9hWBKjyzq6RqGmCBWpWxi3qwJyRcBJfkiEYKV9QWl3H0g/Sg9JzLd9lWG2yfAai7cyBAT4Ih0+OhwQ0V7wkhBn4YkNjs7d4BGPHjuLIywS9VtmiyH7VafikMjmqPLL/uPBIbRrx9RuSfLkAuN9XFZpVmqzWY8ePpcRCvnG6ucPxEY8o+4j5nfTrgxSaIT31kH16/PFJe07tn1SZjxZE4sZTz/p9xKt6s8HXmlP3RdnXSpXWmH8ZwYDrNhkcH8m6mC3giiqSKThFdwvQVflRRvn9pAlUOhy6KIBtAt1KobVJtOCPrrkcLhQ1C+2P9wKhfYspCGrScFGnrUqumLxPpwlqILxJvmgqGAtkm8Ela9f2D9sEv8CUv5x9XptZKlyRhtOLixvLYoJlwfXXnmXa8T1pg8+4063BhHUOu/bg0InpSp3hdscOfk0R8FtDlXnn6COwbPXynIt4PxzIxD/WQhP0ymgH3ky6ClB5wRBVhOqYvxQw32n2QFS9A5ocga+nATiOE7BTOufgmDCA/OIXfJ/GukXRaMCBsvlx7tObHS1LOMt0I+WdoOEjI0ARUrFzwoiTrs9QYmd922e7S35EnheT3JjnCTjebJrCNtwritUy8vjsN/M27wJs7MAXleT7drwXXnm+3xYrH+4KQ+ru0dxMe1zfBw== aanorbel@gmail.com" -admin_usernames: [ art, majakomel, mehul, norbel ] +admin_usernames: [ art, mehul ] root_usernames: [ art, mehul ] -non_admin_usernames: [ agrabeli ] -deactivated_usernames: [ sbs, federico, sarath ] \ No newline at end of file +non_admin_usernames: [ ] +deactivated_usernames: [ sbs, federico, sarath ] diff --git a/ansible/group_vars/clickhouse/vars.yml b/ansible/group_vars/clickhouse/vars.yml new file mode 100644 index 00000000..129a81cb --- /dev/null +++ b/ansible/group_vars/clickhouse/vars.yml @@ -0,0 +1,177 @@ +nftables_clickhouse_allow: + - fqdn: data1.htz-fsn.prod.ooni.nu + ip: 142.132.254.225 + - fqdn: data2.htz-fsn.prod.ooni.nu + ip: 88.198.54.12 + - fqdn: data3.htz-fsn.prod.ooni.nu + ip: 168.119.7.188 + - fqdn: notebook.ooni.org + ip: 138.201.19.39 + +nftables_zookeeper_allow: + - fqdn: data1.htz-fsn.prod.ooni.nu + ip: 142.132.254.225 + - fqdn: data2.htz-fsn.prod.ooni.nu + ip: 88.198.54.12 + - fqdn: data3.htz-fsn.prod.ooni.nu + ip: 168.119.7.188 + - fqdn: notebook.ooni.org + ip: 138.201.19.39 + +clickhouse_version: 24.8.6.70 + +clickhouse_config: + max_connections: 4096 + keep_alive_timeout: 3 + max_concurrent_queries: 100 + max_server_memory_usage: 0 + max_thread_pool_size: 10000 + max_server_memory_usage_to_ram_ratio: 0.9 + total_memory_profiler_step: 4194304 + total_memory_tracker_sample_probability: 0 + uncompressed_cache_size: 8589934592 + mark_cache_size: 5368709120 + # max_open_files: 262144 + mmap_cache_size: 1000 + compiled_expression_cache_size: 134217728 + compiled_expression_cache_elements_size: 10000 + # tmp_policy: tmp + default_profile: default + custom_settings_prefixes: "" + system_profile: write + # buffer_profile: default + default_database: default + # timezone: + # umask: 027 + mlock_executable: true + remap_executable: true + builtin_dictionaries_reload_interval: 3600 + max_session_timeout: 3600 + default_session_timeout: 60 + # regions_hierarchy_file: /opt/geo/regions_hierarchy.txt + # regions_names_files_path: /opt/geo/ + # top_level_domains_path: /var/lib/clickhouse/top_level_domains/ + # top_level_domains: # Path to the list is under top_level_domains_path + # - domain: + # name: example_name + # path: /path/to/example_name.dat + dictionaries_config: "*_dictionary.xml" + user_defined_executable_functions_config: "*_function.xml" + # max_table_size_to_drop: 0 + # max_partition_size_to_drop: 0 + format_schema_path: /var/lib/clickhouse/format_schemas/ + # disable_internal_dns_cache: 1 + +clickhouse_keeper: + tcp_port: 9181 + log_storage_path: /var/lib/clickhouse/coordination/log + snapshot_storage_path: /var/lib/clickhouse/coordination/snapshots + coordination_settings: + operation_timeout_ms: 10000 + session_timeout_ms: 30000 + raft_logs_level: trace + keeper_servers: + - keeper_server: + server: data1.htz-fsn.prod.ooni.nu + id: 1 + hostname: clickhouse1.prod.ooni.io + port: 9234 + + #- keeper_server: + # server: data2.htz-fsn.prod.ooni.nu + # id: 2 + # hostname: clickhouse2.prod.ooni.io + # port: 9234 + + - keeper_server: + server: data3.htz-fsn.prod.ooni.nu + id: 3 + hostname: clickhouse3.prod.ooni.io + port: 9234 + + - keeper_server: + server: notebook.ooni.org + id: 4 + hostname: notebook.ooni.org + port: 9234 + +clickhouse_zookeeper: + - node: + host: clickhouse1.prod.ooni.io + port: 9181 + - node: + host: clickhouse3.prod.ooni.io + port: 9181 + - node: + host: notebook.ooni.org + port: 9181 + +clickhouse_remote_servers: + - server: + servername: oonidata_cluster + secret: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_oonidata_cluster_secret', profile='oonidevops_user_prod') }}" + shards: + - shard: + internal_replication: true + replicas: + - replica: + host: clickhouse1.prod.ooni.io + port: 9000 + #- replica: + # host: clickhouse2.prod.ooni.io + # port: 9000 + - replica: + host: clickhouse3.prod.ooni.io + port: 9000 + +clickhouse_macros: + - macro: | + 01 + 01 + server: + - data1.htz-fsn.prod.ooni.nu + - macro: | + 01 + 02 + server: + - data2.htz-fsn.prod.ooni.nu + - macro: | + 01 + 03 + server: + - data3.htz-fsn.prod.ooni.nu + - macro: | + oonidata_cluster + +clickhouse_distributed_ddl: + path: "/clickhouse/task_queue/ddl" + profile: "write" + pool_size: 1 + task_max_lifetime: 604800 + cleanup_delay_period: 60 + max_tasks_in_queue: 1000 + +clickhouse_default_profiles: + default: + readonly: 2 + write: + readonly: 0 + +clickhouse_listen_hosts: + - "::" + +clickhouse_default_users: + - user: + name: default + password: + networks: + - "127.0.0.1" + profile: default + quota: default + - user: + name: write + password_sha256_hex: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" + networks: + - "0.0.0.0/0" + profile: write + quota: default diff --git a/ansible/group_vars/dev/vars.yml b/ansible/group_vars/dev/vars.yml index a952a5d4..05d78af8 100644 --- a/ansible/group_vars/dev/vars.yml +++ b/ansible/group_vars/dev/vars.yml @@ -1 +1,3 @@ -prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_dev') }}" \ No newline at end of file +prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_dev') }}" +admin_usernames: [ art, mehul, norbel, majakomel ] +non_admin_usernames: [ agrabeli ] diff --git a/ansible/group_vars/prod/vars.yml b/ansible/group_vars/prod/vars.yml index 0248a20a..b80680bb 100644 --- a/ansible/group_vars/prod/vars.yml +++ b/ansible/group_vars/prod/vars.yml @@ -1 +1,7 @@ -prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" \ No newline at end of file +prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" +tailscale_authkey: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/tailscale_authkey_devops', profile='oonidevops_user_prod') }}" +tailscale_tags: + - "devops-prod" +tailscale_oauth_ephemeral: false +admin_usernames: [ art, mehul ] +non_admin_usernames: [ ] diff --git a/ansible/host_vars/data3.htz-fsn.prod.ooni.nu b/ansible/host_vars/data3.htz-fsn.prod.ooni.nu new file mode 100644 index 00000000..4af35281 --- /dev/null +++ b/ansible/host_vars/data3.htz-fsn.prod.ooni.nu @@ -0,0 +1,2 @@ +non_admin_usernames: [ ] +clickhouse_base_path: /data/clickhouse diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 029edcc1..9d75d85e 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -64,8 +64,32 @@ ssh_users: comment: "Ben Ginoe" keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOWdWCATiHUAzoS3mn3pFMIYDmi3n4Ekuzv5cEtvV0W1 root@parrot" - admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe ] jupyterhub_allowed_users: "{{ ssh_users }}" admin_group_name: admin + +clickhouse_default_profiles: + default: + readonly: 2 + write: + readonly: 0 + +clickhouse_listen_hosts: + - "127.0.0.1" + +clickhouse_default_users: + - user: + name: default + password: + networks: + - "127.0.0.1" + profile: default + quota: default + - user: + name: write + password_sha256_hex: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" + networks: + - "127.0.0.1" + profile: write + quota: default diff --git a/ansible/inventory b/ansible/inventory index 1e13c160..25f1f5df 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -8,6 +8,15 @@ oonidata.ooni.org monitoring.ooni.org openvpn-server1.ooni.io notebook.ooni.org +data1.htz-fsn.prod.ooni.nu +data2.htz-fsn.prod.ooni.nu +data3.htz-fsn.prod.ooni.nu [dev] oonidatatest.ooni.nu + +[clickhouse] +notebook.ooni.org +data1.htz-fsn.prod.ooni.nu +data2.htz-fsn.prod.ooni.nu +data3.htz-fsn.prod.ooni.nu diff --git a/ansible/playbook-bootstrap.yml b/ansible/playbook-bootstrap.yml index ab0d34d3..56be3b81 100644 --- a/ansible/playbook-bootstrap.yml +++ b/ansible/playbook-bootstrap.yml @@ -4,5 +4,4 @@ hosts: all remote_user: root roles: - - ssh_users - bootstrap diff --git a/ansible/playbook.yml b/ansible/playbook.yml index b71d2212..ac94df4f 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -4,37 +4,19 @@ become: yes roles: - bootstrap - -- name: ClickHouse servers - hosts: clickhouse_servers - user: admin - become: true - vars: - clickhouse_reader_password: "{{ lookup('env', 'CLICKHOUSE_READER_PASSWORD') }}" - roles: - - clickhouse - handlers: - - name: Restart clickhouse-server - ansible.builtin.service: - name: clickhouse-server - state: restarted + tags: + - bootstrap - name: Update monitoring config hosts: monitoring.ooni.org become: true + tags: + - monitoring roles: - prometheus - prometheus_blackbox_exporter - prometheus_alertmanager -- name: Deploy data.ooni.org host - hosts: data.ooni.org - become: true - roles: - #- clickhouse - - ssh_users - #- jupyterhub - - name: Setup OpenVPN server hosts: openvpn-server1.ooni.io become: true @@ -42,15 +24,37 @@ roles: - ssh_users -- name: Deploy oonidata hosts - hosts: oonidata.ooni.org +- name: Deploy oonidata clickhouse hosts + hosts: + - data1.htz-fsn.prod.ooni.nu + #- data2.htz-fsn.prod.ooni.nu + - data3.htz-fsn.prod.ooni.nu + - notebook.ooni.org become: true + tags: + - clickhouse + roles: + - tailnet + - oonidata_clickhouse + +- name: Deploy oonidata worker nodes + hosts: + - data1.htz-fsn.prod.ooni.nu + become: true + tags: + - oonidata_worker roles: - oonidata + vars: + enable_jupyterhub: false + enable_oonipipeline_worker: true + clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" -- name: Deploy notebook hosts +- name: Deploy notebook host hosts: notebook.ooni.org become: true + tags: + - notebook vars: enable_oonipipeline_worker: false roles: diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 3b4d5ae0..0a2eae7d 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -1,4 +1,9 @@ - src: willshersystems.sshd - src: nginxinc.nginx - src: geerlingguy.certbot -- src: geerlingguy.node_exporter \ No newline at end of file +- src: geerlingguy.node_exporter +- src: artis3n.tailscale +- src: https://github.com/idealista/clickhouse_role + scm: git + version: 3.5.1 + name: idealista.clickhouse_role diff --git a/ansible/roles/bootstrap/handlers/main.yml b/ansible/roles/bootstrap/handlers/main.yml new file mode 100644 index 00000000..a9c712a4 --- /dev/null +++ b/ansible/roles/bootstrap/handlers/main.yml @@ -0,0 +1,18 @@ +- name: Restart chrony + ansible.builtin.systemd_service: + name: chrony.service + state: restarted + +- name: Restart systemd-resolved + ansible.builtin.systemd_service: + name: systemd-resolved.service + state: restarted + +- name: Test systemd-resolved + ansible.builtin.shell: resolvectl query go.dnscheck.tools --cache=no + +- name: Restart systemd-journald + ansible.builtin.systemd_service: + name: systemd-journald.service + state: restarted + enabled: yes diff --git a/ansible/roles/bootstrap/tasks/main.yml b/ansible/roles/bootstrap/tasks/main.yml index 88cd3a78..ecf1d46f 100644 --- a/ansible/roles/bootstrap/tasks/main.yml +++ b/ansible/roles/bootstrap/tasks/main.yml @@ -1,3 +1,11 @@ +- name: write bashrc template + ansible.builtin.template: + src: bashrc + dest: /etc/skel/.bashrc + owner: root + group: root + mode: u=rw,g=r,o=r + - ansible.builtin.include_role: name: ssh_users tags: @@ -13,6 +21,7 @@ - bash-completion - ca-certificates - curl + - chrony - file - git - htop @@ -22,6 +31,7 @@ - man-db - mtr - net-tools + - nvme-cli - openssl - python3-passlib - rsync @@ -34,6 +44,12 @@ update_cache: yes install_recommends: no +- name: Set timezone + community.general.timezone: + name: Etc/UTC + notify: + - Restart chrony + - ansible.builtin.include_role: name: nftables tags: @@ -43,3 +59,31 @@ name: prometheus_node_exporter tags: - node_exporter + +- name: Configure journald + tags: + - journald + template: + src: templates/journald.conf + dest: /etc/systemd/journald.conf + mode: 0644 + owner: root + notify: + - Restart systemd-journald + +- name: install systemd-resolved + tags: resolved + ansible.builtin.apt: + install_recommends: no + cache_valid_time: 86400 + name: + - systemd-resolved + +- name: configure systemd-resolved + tags: resolved + ansible.builtin.template: + src: resolved.conf + dest: /etc/systemd/resolved.conf + notify: + - Restart systemd-resolved + - Test systemd-resolved diff --git a/ansible/roles/bootstrap/templates/bashrc b/ansible/roles/bootstrap/templates/bashrc new file mode 100644 index 00000000..4d34923b --- /dev/null +++ b/ansible/roles/bootstrap/templates/bashrc @@ -0,0 +1,113 @@ +# ~/.bashrc: executed by bash(1) for non-login shells. +# see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) +# for examples + +# If not running interactively, don't do anything +case $- in + *i*) ;; + *) return;; +esac + +# don't put duplicate lines or lines starting with space in the history. +# See bash(1) for more options +HISTCONTROL=ignoreboth + +# append to the history file, don't overwrite it +shopt -s histappend + +# for setting history length see HISTSIZE and HISTFILESIZE in bash(1) +HISTSIZE=1000 +HISTFILESIZE=2000 + +# check the window size after each command and, if necessary, +# update the values of LINES and COLUMNS. +shopt -s checkwinsize + +# If set, the pattern "**" used in a pathname expansion context will +# match all files and zero or more directories and subdirectories. +#shopt -s globstar + +# make less more friendly for non-text input files, see lesspipe(1) +#[ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" + +# set variable identifying the chroot you work in (used in the prompt below) +if [ -z "${debian_chroot:-}" ] && [ -r /etc/debian_chroot ]; then + debian_chroot=$(cat /etc/debian_chroot) +fi + +# set a fancy prompt (non-color, unless we know we "want" color) +case "$TERM" in + xterm-color|*-256color) color_prompt=yes;; +esac + +# uncomment for a colored prompt, if the terminal has the capability; turned +# off by default to not distract the user: the focus in a terminal window +# should be on the output of commands, not on the prompt +#force_color_prompt=yes + +if [ -n "$force_color_prompt" ]; then + if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then + # We have color support; assume it's compliant with Ecma-48 + # (ISO/IEC-6429). (Lack of such support is extremely rare, and such + # a case would tend to support setf rather than setaf.) + color_prompt=yes + else + color_prompt= + fi +fi + +if [ "$color_prompt" = yes ]; then + PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\H\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' +else + PS1='${debian_chroot:+($debian_chroot)}\u@\H:\w\$ ' +fi +unset color_prompt force_color_prompt + +# If this is an xterm set the title to user@host:dir +case "$TERM" in +xterm*|rxvt*) + PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" + ;; +*) + ;; +esac + +# enable color support of ls and also add handy aliases +if [ -x /usr/bin/dircolors ]; then + test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" + alias ls='ls --color=auto' + #alias dir='dir --color=auto' + #alias vdir='vdir --color=auto' + + #alias grep='grep --color=auto' + #alias fgrep='fgrep --color=auto' + #alias egrep='egrep --color=auto' +fi + +# colored GCC warnings and errors +#export GCC_COLORS='error=01;31:warning=01;35:note=01;36:caret=01;32:locus=01:quote=01' + +# some more ls aliases +#alias ll='ls -l' +#alias la='ls -A' +#alias l='ls -CF' + +# Alias definitions. +# You may want to put all your additions into a separate file like +# ~/.bash_aliases, instead of adding them here directly. +# See /usr/share/doc/bash-doc/examples in the bash-doc package. + +if [ -f ~/.bash_aliases ]; then + . ~/.bash_aliases +fi + +# enable programmable completion features (you don't need to enable +# this, if it's already enabled in /etc/bash.bashrc and /etc/profile +# sources /etc/bash.bashrc). +if ! shopt -oq posix; then + if [ -f /usr/share/bash-completion/bash_completion ]; then + . /usr/share/bash-completion/bash_completion + elif [ -f /etc/bash_completion ]; then + . /etc/bash_completion + fi +fi diff --git a/ansible/roles/bootstrap/templates/journald.conf b/ansible/roles/bootstrap/templates/journald.conf new file mode 100644 index 00000000..e06ebc4b --- /dev/null +++ b/ansible/roles/bootstrap/templates/journald.conf @@ -0,0 +1,11 @@ +# ansible managed +# see: roles/bootstrap/templates + +[Journal] +Storage=persistent +Compress=yes +#RateLimitIntervalSec=30s +#RateLimitBurst=10000 +SystemMaxFileSize=200M +RuntimeMaxFileSize=1G +ForwardToSyslog=no diff --git a/ansible/roles/bootstrap/templates/resolved.conf b/ansible/roles/bootstrap/templates/resolved.conf new file mode 100644 index 00000000..834d505d --- /dev/null +++ b/ansible/roles/bootstrap/templates/resolved.conf @@ -0,0 +1,9 @@ +# Deployed by ansible +# See roles/bootstrap/templates/resolved.conf + +[Resolve] +DNS=9.9.9.9 +FallbackDNS=1.1.1.1 8.8.8.8 +DNSOverTLS=opportunistic +DNSSEC=allow-downgrade +Cache=yes diff --git a/ansible/roles/clickhouse/tasks/main.yml b/ansible/roles/clickhouse/tasks/main.yml deleted file mode 100644 index ee7d90bb..00000000 --- a/ansible/roles/clickhouse/tasks/main.yml +++ /dev/null @@ -1,73 +0,0 @@ -- name: install clickhouse requirements - tags: clickhouse - apt: - cache_valid_time: 86400 - state: present - name: - - apt-transport-https - - ca-certificates - - dirmngr - -- name: Check if ClickHouse GPG keyring exists - ansible.builtin.stat: - path: /usr/share/keyrings/clickhouse-keyring.gpg - register: keyring_check - -- name: Create a temporary directory for GPG - ansible.builtin.tempfile: - state: directory - register: gnupg_temp_dir - when: not keyring_check.stat.exists - -- name: Import ClickHouse GPG key - ansible.builtin.command: - cmd: "gpg --no-default-keyring --keyring /usr/share/keyrings/clickhouse-keyring.gpg --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 8919F6BD2B48D754" - chdir: "{{ gnupg_temp_dir.path }}" - creates: "/usr/share/keyrings/clickhouse-keyring.gpg" - environment: - GNUPGHOME: "{{ gnupg_temp_dir.path }}" - when: not keyring_check.stat.exists - -- name: Remove temporary directory - ansible.builtin.file: - path: "{{ gnupg_temp_dir.path }}" - state: absent - when: not keyring_check.stat.exists - -- name: Ensure the keyring is readable - ansible.builtin.file: - path: /usr/share/keyrings/clickhouse-keyring.gpg - mode: a+r - -- name: Add ClickHouse repository - ansible.builtin.apt_repository: - repo: "deb [signed-by=/usr/share/keyrings/clickhouse-keyring.gpg] https://packages.clickhouse.com/deb stable main" - state: present - filename: clickhouse - -- name: Install ClickHouse server and client - ansible.builtin.apt: - name: - - clickhouse-server={{ clickhouse_pkg_ver }} - - clickhouse-client={{ clickhouse_pkg_ver }} - - clickhouse-common-static={{ clickhouse_pkg_ver }} - state: present - update_cache: yes - vars: - clickhouse_pkg_ver: 24.1.* - -- name: Ensure ClickHouse service is started and enabled - ansible.builtin.systemd: - name: clickhouse-server - state: started - enabled: yes - -- name: Configure ClickHouse users from template - template: - src: templates/ooni_users.xml - dest: /etc/clickhouse-server/users.d/ooni_users.xml - owner: clickhouse - group: clickhouse - mode: '0640' - notify: - - restart clickhouse-server diff --git a/ansible/roles/clickhouse/templates/ooni_users.xml b/ansible/roles/clickhouse/templates/ooni_users.xml deleted file mode 100644 index 26081944..00000000 --- a/ansible/roles/clickhouse/templates/ooni_users.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - 1 - - - - - - - 1 - admin - - 127.0.0.1 - - - - - - readonly - - 0.0.0.0 - - {{ clickhouse_reader_password }} - - - - - diff --git a/ansible/roles/miniconda/tasks/main.yml b/ansible/roles/miniconda/tasks/main.yml index 0bddf448..fa195d36 100644 --- a/ansible/roles/miniconda/tasks/main.yml +++ b/ansible/roles/miniconda/tasks/main.yml @@ -3,6 +3,7 @@ ansible.builtin.user: name: miniconda shell: /bin/false + home: "{{ miniconda_install_dir }}" - name: Check if Miniconda is installed ansible.builtin.stat: @@ -11,5 +12,3 @@ - include_tasks: install.yml when: not miniconda_bin.stat.exists - - diff --git a/ansible/roles/nftables/defaults/main.yml b/ansible/roles/nftables/defaults/main.yml new file mode 100644 index 00000000..1f9e51f1 --- /dev/null +++ b/ansible/roles/nftables/defaults/main.yml @@ -0,0 +1,10 @@ +nft_rules_tcp: + - name: 22 + rules: + - add rule inet filter input tcp dport 22 counter accept comment "Incoming SSH" + #- name: 80 + # rules: + # - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" + #- name: 443 + # rules: + # - add rule inet filter input tcp dport 443 counter accept comment "incoming HTTPS" diff --git a/ansible/roles/nftables/tasks/main.yml b/ansible/roles/nftables/tasks/main.yml index 2789b150..5946772b 100644 --- a/ansible/roles/nftables/tasks/main.yml +++ b/ansible/roles/nftables/tasks/main.yml @@ -16,12 +16,15 @@ tags: - nftables -- name: allow SSH - ansible.builtin.blockinfile: - path: /etc/ooni/nftables/tcp/22.nft - create: yes - block: | - add rule inet filter input tcp dport 22 counter accept comment "Incoming SSH" +- name: "write nft config for item" + ansible.builtin.template: + src: "rule.nft.j2" + dest: "/etc/ooni/nftables/tcp/{{ item.name }}.nft" + vars: + rules: "{{ item.rules }}" + loop: "{{ nft_rules_tcp }}" + notify: + - Reload nftables tags: - nftables diff --git a/ansible/roles/nftables/templates/nftables.conf b/ansible/roles/nftables/templates/nftables.conf index 5f7b50cc..0d94f9ca 100755 --- a/ansible/roles/nftables/templates/nftables.conf +++ b/ansible/roles/nftables/templates/nftables.conf @@ -38,4 +38,3 @@ include "/etc/ooni/nftables/tcp/*.nft" # Configure any other rule include "/etc/ooni/nftables/*.nft" - diff --git a/ansible/roles/nftables/templates/rule.nft.j2 b/ansible/roles/nftables/templates/rule.nft.j2 new file mode 100644 index 00000000..093c8baa --- /dev/null +++ b/ansible/roles/nftables/templates/rule.nft.j2 @@ -0,0 +1,4 @@ +{{ ansible_managed | comment }} +{% for entry in rules %} +{{ entry }} +{% endfor %} diff --git a/ansible/roles/nginx/handlers/main.yml b/ansible/roles/nginx/handlers/main.yml index dff67402..eb1d1671 100644 --- a/ansible/roles/nginx/handlers/main.yml +++ b/ansible/roles/nginx/handlers/main.yml @@ -13,9 +13,3 @@ service: name: nginx state: reloaded - -- name: reload nftables - tags: nftables - ansible.builtin.systemd_service: - name: nftables - state: reloaded diff --git a/ansible/roles/nginx/tasks/main.yml b/ansible/roles/nginx/tasks/main.yml index 23bb4ecd..757b42e0 100644 --- a/ansible/roles/nginx/tasks/main.yml +++ b/ansible/roles/nginx/tasks/main.yml @@ -1,4 +1,18 @@ --- +- ansible.builtin.include_role: + name: nftables + vars: + nft_rules_tcp: + - name: 80 + rules: + - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" + - name: 443 + rules: + - add rule inet filter input tcp dport 443 counter accept comment "incoming HTTPS" + tags: + - nginx + - nftables + - name: install nginx include_role: name: nginxinc.nginx @@ -47,25 +61,3 @@ mode: 0755 tags: - nftables - -- name: allow incoming TCP connections to Nginx port 80 - blockinfile: - path: /etc/ooni/nftables/tcp/80.nft - create: yes - block: | - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" - notify: - - reload nftables - tags: - - nginx - -- name: allow incoming TCP connections to Nginx port 443 - blockinfile: - path: /etc/ooni/nftables/tcp/443.nft - create: yes - block: | - add rule inet filter input tcp dport 443 counter accept comment "incoming HTTP" - notify: - - reload nftables - tags: - - nginx diff --git a/ansible/roles/oonidata/defaults/main.yml b/ansible/roles/oonidata/defaults/main.yml index a3ae96df..84694a5c 100644 --- a/ansible/roles/oonidata/defaults/main.yml +++ b/ansible/roles/oonidata/defaults/main.yml @@ -5,8 +5,10 @@ oonipipeline_runtime_dir: /srv/oonipipeline tls_cert_dir: /etc/letsencrypt/live admin_group_name: admin enable_oonipipeline_worker: true -enable_oonidata_proxy: false enable_jupyterhub: true +clickhouse_url: "clickhouse://localhost" +certbot_domains: + - "{{ inventory_hostname }}" conda_forge_packages: - seaborn - dask @@ -21,3 +23,8 @@ pip_packages: - "clickhouse-driver" - pomegranate - pgmpy +apt_packages: + - net-tools + - curl + - git + - socat diff --git a/ansible/roles/oonidata/meta/requirements.yml b/ansible/roles/oonidata/meta/requirements.yml new file mode 100644 index 00000000..0c765e0c --- /dev/null +++ b/ansible/roles/oonidata/meta/requirements.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - geerlingguy.certbot diff --git a/ansible/roles/oonidata/tasks/jupyterhub.yml b/ansible/roles/oonidata/tasks/jupyterhub.yml index 040cdeab..7502668b 100644 --- a/ansible/roles/oonidata/tasks/jupyterhub.yml +++ b/ansible/roles/oonidata/tasks/jupyterhub.yml @@ -2,6 +2,7 @@ - name: Install jupyterhub ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -c conda-forge -y jupyterhub" + become_user: miniconda tags: - oonidata - jupyterhub @@ -9,6 +10,7 @@ - name: Install jupyterlab and notebook ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -y jupyterlab notebook" + become_user: miniconda tags: - oonidata - jupyterhub @@ -89,3 +91,23 @@ - oonidata - jupyterhub - config + +- ansible.builtin.include_role: + name: nginx + tags: + - oonidata + - nginx + +- ansible.builtin.include_role: + name: geerlingguy.certbot + tags: + - oonidata + - certbot + vars: + certbot_admin_email: admin@ooni.org + certbot_create_extra_args: "" + certbot_create_if_missing: true + certbot_create_standalone_stop_services: + - nginx + certbot_certs: + - domains: "{{ certbot_domains }}" diff --git a/ansible/roles/oonidata/tasks/main.yml b/ansible/roles/oonidata/tasks/main.yml index f2040bd2..48f05bae 100644 --- a/ansible/roles/oonidata/tasks/main.yml +++ b/ansible/roles/oonidata/tasks/main.yml @@ -22,34 +22,9 @@ - oonidata - jupyterhub -- ansible.builtin.include_role: - name: nginx - tags: - - oonidata - - nginx - -- ansible.builtin.include_role: - name: geerlingguy.certbot - tags: - - oonidata - - certbot - vars: - certbot_admin_email: admin@ooni.org - certbot_create_extra_args: "" - certbot_create_if_missing: true - certbot_create_standalone_stop_services: - - nginx - certbot_certs: - - domains: - - "{{ inventory_hostname }}" - -- name: Install oonipipeline requirements +- name: Install apt packages ansible.builtin.apt: - name: - - net-tools - - curl - - git - - socat + name: "{{ apt_packages }}" tags: - oonidata - oonipipeline @@ -58,7 +33,9 @@ - name: "install conda packages" ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -y {{ item }}" + chdir: "{{ miniconda_install_dir }}" loop: "{{ conda_packages }}" + become_user: miniconda tags: - oonidata - oonipipeline @@ -67,7 +44,9 @@ - name: Install conda-forge packages ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/conda install -c conda-forge -y {{ item }}" + chdir: "{{ miniconda_install_dir }}" loop: "{{ conda_forge_packages }}" + become_user: miniconda tags: - oonidata - packages @@ -75,7 +54,9 @@ - name: "Install pip packages" ansible.builtin.shell: cmd: "{{ miniconda_install_dir }}/bin/pip install {{ item }}" + chdir: "{{ miniconda_install_dir }}" loop: "{{ pip_packages }}" + become_user: miniconda tags: - oonidata - oonipipeline @@ -86,15 +67,3 @@ tags: - oonidata - oonipipeline - -- name: Write oonidataproxy service - ansible.builtin.template: - src: oonidata-proxy.service.j2 - dest: "/etc/systemd/system/oonidata-proxy.service" - owner: root - group: root - mode: "0644" - notify: - - Restart oonidata-proxy - tags: - - oonipipeline diff --git a/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 b/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 deleted file mode 100644 index ea539876..00000000 --- a/ansible/roles/oonidata/templates/oonidata-proxy.service.j2 +++ /dev/null @@ -1,15 +0,0 @@ -# JupyterHub systemd service -[Unit] - -[Service] -User=oonipipeline -Restart=always -PrivateTmp=yes -PrivateDevices=yes -ProtectKernelTunables=yes -ProtectKernelModules=yes -ExecStart=/usr/bin/socat tcp-listen:9000,reuseaddr,fork tcp:data.ooni.org:9000 - -[Install] -# Start service when system boots -WantedBy=multi-user.target diff --git a/ansible/roles/oonidata/templates/oonipipeline-config.toml.j2 b/ansible/roles/oonidata/templates/oonipipeline-config.toml.j2 index a41dcb43..d9461cbf 100644 --- a/ansible/roles/oonidata/templates/oonipipeline-config.toml.j2 +++ b/ansible/roles/oonidata/templates/oonipipeline-config.toml.j2 @@ -3,5 +3,6 @@ temporal_namespace = "ooni-pipeline.uuhzf" temporal_tls_client_cert_path = "/etc/ooni/pipeline/ooni-pipeline.uuhzf.crt" temporal_tls_client_key_path = "/etc/ooni/pipeline/ooni-pipeline.uuhzf.key" clickhouse_write_batch_size = 30000 +clickhouse_url = "{{ clickhouse_url }}" prometheus_bind_address = "127.0.0.1:9998" -data_dir = "/srv/oonipipeline/data_dir" \ No newline at end of file +data_dir = "/srv/oonipipeline/data_dir" diff --git a/ansible/roles/oonidata_clickhouse/defaults/main.yml b/ansible/roles/oonidata_clickhouse/defaults/main.yml new file mode 100644 index 00000000..e69de29b diff --git a/ansible/roles/oonidata_clickhouse/handlers/main.yml b/ansible/roles/oonidata_clickhouse/handlers/main.yml new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/ansible/roles/oonidata_clickhouse/handlers/main.yml @@ -0,0 +1 @@ + diff --git a/ansible/roles/oonidata_clickhouse/tasks/main.yml b/ansible/roles/oonidata_clickhouse/tasks/main.yml new file mode 100644 index 00000000..febc1884 --- /dev/null +++ b/ansible/roles/oonidata_clickhouse/tasks/main.yml @@ -0,0 +1,38 @@ +- ansible.builtin.include_role: + name: idealista.clickhouse_role + tags: + - oonidata + - clickhouse + +- name: Create nftables rule for clickhouse native port + set_fact: + ch_nft_rule: '{{ ch_nft_rule | default([]) + [''add rule inet filter input ip saddr '' + item.ip + '' tcp dport 9000 counter accept comment "incoming clickhouse from '' + item.fqdn + ''"''] }}' + loop: "{{ nftables_clickhouse_allow | rejectattr('fqdn', 'eq', inventory_hostname) | list }}" + +- name: Create nftables rule for clickhouse inter-server communication + set_fact: + is_nft_rule: '{{ is_nft_rule | default([]) + [''add rule inet filter input ip saddr '' + item.ip + '' tcp dport 9009 counter accept comment "incoming clickhouse from '' + item.fqdn + ''"''] }}' + loop: "{{ nftables_clickhouse_allow | rejectattr('fqdn', 'eq', inventory_hostname) | list }}" + +- name: Create nftables rule for zookeeper + set_fact: + zk_nft_rule: '{{ zk_nft_rule | default([]) + [''add rule inet filter input ip saddr '' + item.ip + '' tcp dport 9181 counter accept comment "incoming zookeeper from '' + item.fqdn + ''"''] }}' + loop: "{{ nftables_zookeeper_allow | rejectattr('fqdn', 'eq', inventory_hostname) | list }}" + +- name: Create nftables rule for raft port + set_fact: + raft_nft_rule: '{{ raft_nft_rule | default([]) + [''add rule inet filter input ip saddr '' + item.ip + '' tcp dport 9234 counter accept comment "incoming raft from '' + item.fqdn + ''"''] }}' + loop: "{{ nftables_zookeeper_allow | rejectattr('fqdn', 'eq', inventory_hostname) | list }}" + +- ansible.builtin.include_role: + name: nftables + vars: + nft_rules_tcp: + - name: 9000 + rules: "{{ ch_nft_rule }}" + - name: 9009 + rules: "{{ is_nft_rule }}" + - name: 9181 + rules: "{{ zk_nft_rule }}" + - name: 9234 + rules: "{{ raft_nft_rule }}" diff --git a/ansible/roles/prometheus_node_exporter/handlers/main.yml b/ansible/roles/prometheus_node_exporter/handlers/main.yml index 8face3a3..69a5b2fe 100644 --- a/ansible/roles/prometheus_node_exporter/handlers/main.yml +++ b/ansible/roles/prometheus_node_exporter/handlers/main.yml @@ -13,9 +13,3 @@ ansible.builtin.systemd_service: name: nginx state: restarted - -- name: Reload nftables - tags: nftables - ansible.builtin.systemd_service: - name: nftables - state: reloaded diff --git a/ansible/roles/prometheus_node_exporter/tasks/main.yml b/ansible/roles/prometheus_node_exporter/tasks/main.yml index 113863eb..0c4fc242 100644 --- a/ansible/roles/prometheus_node_exporter/tasks/main.yml +++ b/ansible/roles/prometheus_node_exporter/tasks/main.yml @@ -49,14 +49,13 @@ - node_exporter - config -- name: Allow prometheus monitoring - ansible.builtin.blockinfile: - path: /etc/ooni/nftables/tcp/9100.nft - create: yes - block: | - add rule inet filter input tcp dport 9100 counter accept comment "Incoming prometheus monitoring" - notify: - - Reload nftables +- ansible.builtin.include_role: + name: nftables + vars: + nft_rules_tcp: + - name: 9100 + rules: + - add rule inet filter input tcp dport 9100 counter accept comment "Incoming prometheus monitoring" tags: - monitoring - node_exporter diff --git a/ansible/roles/ssh_users/tasks/main.yml b/ansible/roles/ssh_users/tasks/main.yml index 0d994377..d3534dcd 100644 --- a/ansible/roles/ssh_users/tasks/main.yml +++ b/ansible/roles/ssh_users/tasks/main.yml @@ -66,6 +66,21 @@ force: yes with_items: "{{ deactivated_usernames }}" +- name: configure sshd + include_role: + name: willshersystems.sshd + vars: + sshd_skip_defaults: false + sshd: + AllowUsers: "{{ admin_usernames | union(non_admin_usernames) | sort | join(' ') }}" + +- name: Enesure sudoers dir exists + ansible.builtin.file: + path: /etc/sudoers.d + state: directory + owner: root + group: root + - name: sudoers.d/80-admins template: src: sudoers @@ -79,11 +94,3 @@ ansible.builtin.file: path: /etc/sudoers.d/adm state: absent - -- name: configure sshd - include_role: - name: willshersystems.sshd - vars: - sshd_skip_defaults: false - sshd: - AllowUsers: "{{ admin_usernames | union(non_admin_usernames) | sort | join(' ') }}" diff --git a/ansible/roles/tailnet/tasks/main.yml b/ansible/roles/tailnet/tasks/main.yml new file mode 100644 index 00000000..86bc4b3d --- /dev/null +++ b/ansible/roles/tailnet/tasks/main.yml @@ -0,0 +1,4 @@ +- ansible.builtin.include_role: + name: artis3n.tailscale + tags: + - tailnet diff --git a/docs/merge-tree-replication.md b/docs/merge-tree-replication.md new file mode 100644 index 00000000..ac9e1e21 --- /dev/null +++ b/docs/merge-tree-replication.md @@ -0,0 +1,127 @@ +## Replicating MergeTree tables + +Notes on how to go about converting a MergeTree family table to a replicated table, while minimizing downtime. + +See the following links for more information: + +- https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated/ +- https://clickhouse.com/docs/en/operations/system-tables/replicas +- https://clickhouse.com/docs/en/architecture/replication#verify-that-clickhouse-keeper-is-running +- https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication +- https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings + +### Workflow + +You should first create the replicated database cluster following the +instructions at the [clickhouse docs](https://clickhouse.com/docs/en/architecture/replication). + +The ooni-devops repo has a role called `oonidata_clickhouse` that does that by using the [idealista.clickhouse_role](https://github.com/idealista/clickhouse_role). + +Once the cluster is created you can proceed with creating a DATABASE on the cluster by running: + +``` +CREATE DATABASE ooni ON CLUSTER oonidata_cluster +``` + +There are now a few options to go about doing this: + +1. You just create the new replicated tables and perform a copy into the destination database by running on the source database the following: + +``` +INSERT INTO FUNCTION +remote('destination-database.ooni.nu', 'obs_web', 'USER', 'PASSWORD') +SELECT * from obs_web +``` + +This will require duplicating the data and might not be feasible. + +2. If you already have all the data setup on one host and you just want to convert the database into a replicate one, you can do the following: + +We assume there are 2 tables: `obs_web_bak` (which is the source table) and +`obs_web` which is the destination table. We also assume a single shard and +multiple replicas. + +First create the destination replicated table. To retrieve the table create query you can run: + +```sql +select create_table_query +from system.tables +where database = 'default' and table = 'obs_web' +``` + +You should then modify the table to make use of the `ReplicateReplacingMergeTree` engine: + +```sql +CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) +ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 +``` + +Check all the partitions that exist for the source table and produce ALTER queries to map them from the source to the destination: + +```sql +SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' AND active; +``` + +While you are running the following, you should stop all merges by running: + +```sql +SYSTEM STOP MERGES; +``` + +This can then be scripted like so: + +```sh +clickhouse-client -q "SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' format TabSeparatedRaw" | clickhouse-client -u write --password XXXX -mn +``` + +You will now have a replicated table existing on one of the replicas. + +Then you shall for each other replica in the set manually create the table, but this time pass in it explicitly the zookeeper path. + +You can get the zookeeper path by running the following on the first replica you have setup + +```sql +SELECT zookeeper_path FROM system.replicas WHERE table = 'obs_web'; +``` + +For each replica you will then have to create the tables like so: + +```sql +CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/oonidata_cluster/tables/ooni/obs_web/01', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) +ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 +``` + +You will then have to manually copy the data over to the destination replica from the source. + +The data lives inside of `/var/lib/clickhouse/data/{database_name}/{table_name}` + +Once the data has been copied over you should now have replicated the data and you can resume merges on all database by running: + +```sql +SYSTEM START MERGES; +``` + +### Creating tables on clusters + +```sql +CREATE TABLE ooni.obs_web_ctrl ON CLUSTER oonidata_cluster +(`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `hostname` String, `created_at` Nullable(DateTime64(3, 'UTC')), `ip` String, `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_failure` Nullable(String), `dns_success` Nullable(UInt8), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tls_failure` Nullable(String), `tls_success` Nullable(UInt8), `tls_server_name` Nullable(String), `http_request_url` Nullable(String), `http_failure` Nullable(String), `http_success` Nullable(UInt8), `http_response_body_length` Nullable(Int32)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) ORDER BY (measurement_uid, observation_idx, measurement_start_time, hostname) SETTINGS index_granularity = 8192 +``` diff --git a/tf/environments/prod/.terraform.lock.hcl b/tf/environments/prod/.terraform.lock.hcl index 6f3c4ce4..d11e04d7 100644 --- a/tf/environments/prod/.terraform.lock.hcl +++ b/tf/environments/prod/.terraform.lock.hcl @@ -2,49 +2,49 @@ # Manual edits may be lost in future updates. provider "registry.terraform.io/digitalocean/digitalocean" { - version = "2.41.0" + version = "2.43.0" constraints = "~> 2.0" hashes = [ - "h1:Ne6nxvygwwHbNEO9My9uukE/YtlwAVMr/Bud1FIc6uc=", - "zh:13bfbca765a302a8fdf9ca0e4c5d25c7ee62d21b2bc7fbc241e298215c78e5f7", - "zh:45ef1602bb56fde0b6755f99847da0549144ebdd4af2da695e44d1a06d24d685", - "zh:4a6d81c462a11e710dd6138bb18573f60af456e83c5af0c1158578b4dc8e07f9", - "zh:5827b9463f7fce29bf4d9eb9264771d3aec103ed25e2151e570e8bee27b2dc6a", - "zh:639e59ffddb267a5255d66b93c816b713df96a304c23757364a96a65159ee177", - "zh:6876c162f2e4f850c4acede81857c72665710af2f552f19b1de56bcd5addc86a", - "zh:6a23b529309d6e8f59339d9572504e08f5c90491dfa0d1b1468a6fd7bd6b1b3d", - "zh:7d6e2c103f097a694b81d0e22ecd24ec2778a307e64dbef8de4f956d53219274", - "zh:8203577b5ad891e84afa994a47c6aba85401edf4bdd5aaf7f5e30e59e1393880", - "zh:88672feeae8ac9f4f99391b99957426c9c0a667021c658c4c9dad23abd5b5832", - "zh:ae3703123073a7808cea5a7a89289973e58a4fd83e94680091d4a8420ad521f5", - "zh:b59dd8675402e49a1fba5d2cf14596553c21f104bbb90a1167aa44c39693e7a5", - "zh:bb608cf1db63f985709e0052dbc3d16e9c801a23ebbf4d0a687c8a89d09e3769", - "zh:f1164e25518c00a640a8a375b2214d9bfc86297d2d726a6d35ed6d5de334ef96", - "zh:fc8a0a0375b26095e78ecfd987b79e6ef26c9c5d2e4393d437a9601ea1f3c5c5", - "zh:ffae2daa3ef366047885ace62f2fd0d126d6581d253996ef78c11bc5acbb3999", + "h1:NFD+iFS14S3EILq2ZJ8bHaQGetYEAnETqEjkhl52eiI=", + "zh:0023fa4ca4304e9141357df9dafff3bdb33f0189d0c8544f8b872070660ccb0e", + "zh:4004c3034197ca6a2d719d26125eb21e01e652dc77932e27fd0c60151d7ca6d1", + "zh:44173e57c086cad3177bb6c2063981fb9f4ac2d5f7fd9a9e1891b8c16a00d0d9", + "zh:4622261e108f8539102ce84894e03afcf9f70c796eee0ddced02c235a15d9460", + "zh:4fd86a35073061746c5b7dc693fb2a44793a15b49791edcbf0dbefef1d3dae0c", + "zh:5e00b0d847ce0f1e2f269ae55e1f9ea9ea76efb0f40af9ad43c61f89dd84a6d6", + "zh:815c30ce11020e18dd05462f22038764c4200c61a27313e67343dc66ebdcf12c", + "zh:901be1ee215935e0a459b9cb91699757e442355e5dd625637481e1d33cc0498c", + "zh:9bd04a076c175d2b90ab69cd03753e5e0ac3bab96ee6bfcaba83dcd29c829135", + "zh:9d03d25e7e30a2da6f6c2b7f46f6d21a33d55ee80209c21361b57baf7f3dd3f3", + "zh:b1f6ac1c4296e4e0e84b6955661058b04c812d72292d8f3af0b93327b59d0e6b", + "zh:c1cabafc7f1b836a56d62aa43b7d5b77faeb6d685490825f90b776c6852e9ffd", + "zh:ddfcf6ef57b99193f0dde25796cc8ad96a04dcb940eccd137e9a4d5f50c21d17", + "zh:e93dffb991e7ad7c8a0800bd6c7a692225f87656a8b73d7f0e8489a0635ea8ce", + "zh:f2137db6bd5a10662fe23c779c05d312eb71f6df5aa8d5f1e6a45b4c0404b2a0", + "zh:f5e494414b35293f830ffc741e4915744fa84400810dcbcb7df9920a4dadc56d", ] } provider "registry.terraform.io/hashicorp/aws" { - version = "5.44.0" + version = "5.75.0" constraints = ">= 4.9.0, >= 4.66.1" hashes = [ - "h1:K3sX+P4wofRNcVsnYW4PIhxHijd3w/ZD5AO7yWFPT6A=", - "zh:1224a42bb04574785549b89815d98bda11f6e9992352fc6c36c5622f3aea91c0", - "zh:2a8d1095a2f1ab097f516d9e7e0d289337849eebb3fcc34f075070c65063f4fa", - "zh:46cce11150eb4934196d9bff693b72d0494c85917ceb3c2914d5ff4a785af861", - "zh:4a7c15d585ee747d17f4b3904851cd95cfbb920fa197aed3df78e8d7ef9609b6", - "zh:508f1a85a0b0f93bf26341207d809bd55b60c8fdeede40097d91f30111fc6f5d", - "zh:52f968ffc21240213110378d0ffb298cbd23e9157a6d01dfac5a4360492d69c2", - "zh:5e9846b48ef03eb59541049e81b15cae8bc7696a3779ae4a5412fdce60bb24e0", - "zh:850398aecaf7dc0231fc320fdd6dffe41836e07a54c8c7b40eb28e7525d3c0a9", - "zh:8f87eeb05bdd1b873b6cfb3898dfad6402ac180dfa3c8f9754df8f85dcf92ca6", + "h1:1R08bG9RT1qWHU6K0B992s3VbTIdb7cWt421+TBVS/8=", + "zh:01b01b132b70df918f735898f1ad012ab3033d1b909b2e38950d16964d94c084", + "zh:28bc6ee7b0c88b1a48f315509ad390fb1e8f39bebe0f7a43c22b1a63825251d1", + "zh:31f9043a4c3538883ab9b9d3b399dae62e4552251e6a2b1da13ec3a2018a027d", + "zh:47451c295ffbddd19679a41d728f0942486d6de0d9206418d9593dda5a20c120", + "zh:5204c1a9f41dcc10e38879d41d95d95fdbb10527f613c129603137b1dbe99777", + "zh:64c3165a6019045782c8ad2a40d6fa4253d44dba67a5a971a81791cff5a9d3d5", "zh:9b12af85486a96aedd8d7984b0ff811a4b42e3d88dad1a3fb4c0b580d04fa425", - "zh:c726b87cd6ed111536f875dccedecff21abc802a4087264515ffab113cac36dc", - "zh:d57ea706d2f98b93c7b05b0c6bc3420de8e8cf2d0b6703085dc15ed239b2cc49", - "zh:d5d1a21246e68c2a7a04c5619eb0ad5a81644f644c432cb690537b816a156de2", - "zh:e869904cac41114b7e4ee66bcd2ce4585ed15ca842040a60cb47119f69472c91", - "zh:f1a09f2f3ea72cbe795b865cf31ad9b1866a536a8050cf0bb93d3fa51069582e", + "zh:a5788f78da2f0ac78f99ca2a4c489c041654bec992f3183fd0b972e0554f91e9", + "zh:aed486e3b24e9f82543bf558b2a7eade4a905608060fac1284145c00ff63d3e2", + "zh:b42523c409940a9c3866f4973c8251b96e5f3a0934230849c533a04b95854965", + "zh:b570353eeb97b3ed1b423a6f67857a7a3c1c47c9907e45a81c3df186a2fd88d0", + "zh:bf05df84199cbc776a878f920f6be4d27737f2de204f80794e6a652d49692f0d", + "zh:c27133287d20620244de95f4c2438135e60c057e0891a3ec97539c990f7ebdec", + "zh:c59143082fe8e4f5d5b0676472b8b0e24c2a2f1ede622a64f9f24639382d4b03", + "zh:ebe01c3b7a85deebc10b4081097dd6e8b4c79b7c13a20acb099bd17ff06afcb7", ] } @@ -68,98 +68,98 @@ provider "registry.terraform.io/hashicorp/cloudinit" { } provider "registry.terraform.io/hashicorp/local" { - version = "2.5.1" + version = "2.5.2" constraints = ">= 2.0.0" hashes = [ - "h1:/GAVA/xheGQcbOZEq0qxANOg+KVLCA7Wv8qluxhTjhU=", - "zh:0af29ce2b7b5712319bf6424cb58d13b852bf9a777011a545fac99c7fdcdf561", - "zh:126063ea0d79dad1f68fa4e4d556793c0108ce278034f101d1dbbb2463924561", - "zh:196bfb49086f22fd4db46033e01655b0e5e036a5582d250412cc690fa7995de5", - "zh:37c92ec084d059d37d6cffdb683ccf68e3a5f8d2eb69dd73c8e43ad003ef8d24", - "zh:4269f01a98513651ad66763c16b268f4c2da76cc892ccfd54b401fff6cc11667", - "zh:51904350b9c728f963eef0c28f1d43e73d010333133eb7f30999a8fb6a0cc3d8", - "zh:73a66611359b83d0c3fcba2984610273f7954002febb8a57242bbb86d967b635", + "h1:IyFbOIO6mhikFNL/2h1iZJ6kyN3U00jgkpCLUCThAfE=", + "zh:136299545178ce281c56f36965bf91c35407c11897f7082b3b983d86cb79b511", + "zh:3b4486858aa9cb8163378722b642c57c529b6c64bfbfc9461d940a84cd66ebea", + "zh:4855ee628ead847741aa4f4fc9bed50cfdbf197f2912775dd9fe7bc43fa077c0", + "zh:4b8cd2583d1edcac4011caafe8afb7a95e8110a607a1d5fb87d921178074a69b", + "zh:52084ddaff8c8cd3f9e7bcb7ce4dc1eab00602912c96da43c29b4762dc376038", + "zh:71562d330d3f92d79b2952ffdda0dad167e952e46200c767dd30c6af8d7c0ed3", "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:7ae387993a92bcc379063229b3cce8af7eaf082dd9306598fcd42352994d2de0", - "zh:9e0f365f807b088646db6e4a8d4b188129d9ebdbcf2568c8ab33bddd1b82c867", - "zh:b5263acbd8ae51c9cbffa79743fbcadcb7908057c87eb22fd9048268056efbc4", - "zh:dfcd88ac5f13c0d04e24be00b686d069b4879cc4add1b7b1a8ae545783d97520", + "zh:805f81ade06ff68fa8b908d31892eaed5c180ae031c77ad35f82cb7a74b97cf4", + "zh:8b6b3ebeaaa8e38dd04e56996abe80db9be6f4c1df75ac3cccc77642899bd464", + "zh:ad07750576b99248037b897de71113cc19b1a8d0bc235eb99173cc83d0de3b1b", + "zh:b9f1c3bfadb74068f5c205292badb0661e17ac05eb23bfe8bd809691e4583d0e", + "zh:cc4cbcd67414fefb111c1bf7ab0bc4beb8c0b553d01719ad17de9a047adff4d1", ] } provider "registry.terraform.io/hashicorp/null" { - version = "3.2.2" + version = "3.2.3" hashes = [ - "h1:IMVAUHKoydFrlPrl9OzasDnw/8ntZFerCC9iXw1rXQY=", - "zh:3248aae6a2198f3ec8394218d05bd5e42be59f43a3a7c0b71c66ec0df08b69e7", - "zh:32b1aaa1c3013d33c245493f4a65465eab9436b454d250102729321a44c8ab9a", - "zh:38eff7e470acb48f66380a73a5c7cdd76cc9b9c9ba9a7249c7991488abe22fe3", - "zh:4c2f1faee67af104f5f9e711c4574ff4d298afaa8a420680b0cb55d7bbc65606", - "zh:544b33b757c0b954dbb87db83a5ad921edd61f02f1dc86c6186a5ea86465b546", - "zh:696cf785090e1e8cf1587499516b0494f47413b43cb99877ad97f5d0de3dc539", - "zh:6e301f34757b5d265ae44467d95306d61bef5e41930be1365f5a8dcf80f59452", + "h1:I0Um8UkrMUb81Fxq/dxbr3HLP2cecTH2WMJiwKSrwQY=", + "zh:22d062e5278d872fe7aed834f5577ba0a5afe34a3bdac2b81f828d8d3e6706d2", + "zh:23dead00493ad863729495dc212fd6c29b8293e707b055ce5ba21ee453ce552d", + "zh:28299accf21763ca1ca144d8f660688d7c2ad0b105b7202554ca60b02a3856d3", + "zh:55c9e8a9ac25a7652df8c51a8a9a422bd67d784061b1de2dc9fe6c3cb4e77f2f", + "zh:756586535d11698a216291c06b9ed8a5cc6a4ec43eee1ee09ecd5c6a9e297ac1", "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:913a929070c819e59e94bb37a2a253c228f83921136ff4a7aa1a178c7cce5422", - "zh:aa9015926cd152425dbf86d1abdbc74bfe0e1ba3d26b3db35051d7b9ca9f72ae", - "zh:bb04798b016e1e1d49bcc76d62c53b56c88c63d6f2dfe38821afef17c416a0e1", - "zh:c23084e1b23577de22603cff752e59128d83cfecc2e6819edadd8cf7a10af11e", + "zh:9d5eea62fdb587eeb96a8c4d782459f4e6b73baeece4d04b4a40e44faaee9301", + "zh:a6355f596a3fb8fc85c2fb054ab14e722991533f87f928e7169a486462c74670", + "zh:b5a65a789cff4ada58a5baffc76cb9767dc26ec6b45c00d2ec8b1b027f6db4ed", + "zh:db5ab669cf11d0e9f81dc380a6fdfcac437aea3d69109c7aef1a5426639d2d65", + "zh:de655d251c470197bcbb5ac45d289595295acb8f829f6c781d4a75c8c8b7c7dd", + "zh:f5c68199f2e6076bce92a12230434782bf768103a427e9bb9abee99b116af7b5", ] } provider "registry.terraform.io/hashicorp/random" { - version = "3.6.0" + version = "3.6.3" hashes = [ - "h1:I8MBeauYA8J8yheLJ8oSMWqB0kovn16dF/wKZ1QTdkk=", - "zh:03360ed3ecd31e8c5dac9c95fe0858be50f3e9a0d0c654b5e504109c2159287d", - "zh:1c67ac51254ba2a2bb53a25e8ae7e4d076103483f55f39b426ec55e47d1fe211", - "zh:24a17bba7f6d679538ff51b3a2f378cedadede97af8a1db7dad4fd8d6d50f829", - "zh:30ffb297ffd1633175d6545d37c2217e2cef9545a6e03946e514c59c0859b77d", - "zh:454ce4b3dbc73e6775f2f6605d45cee6e16c3872a2e66a2c97993d6e5cbd7055", + "h1:zG9uFP8l9u+yGZZvi5Te7PV62j50azpgwPunq2vTm1E=", + "zh:04ceb65210251339f07cd4611885d242cd4d0c7306e86dda9785396807c00451", + "zh:448f56199f3e99ff75d5c0afacae867ee795e4dfda6cb5f8e3b2a72ec3583dd8", + "zh:4b4c11ccfba7319e901df2dac836b1ae8f12185e37249e8d870ee10bb87a13fe", + "zh:4fa45c44c0de582c2edb8a2e054f55124520c16a39b2dfc0355929063b6395b1", + "zh:588508280501a06259e023b0695f6a18149a3816d259655c424d068982cbdd36", + "zh:737c4d99a87d2a4d1ac0a54a73d2cb62974ccb2edbd234f333abd079a32ebc9e", "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:91df0a9fab329aff2ff4cf26797592eb7a3a90b4a0c04d64ce186654e0cc6e17", - "zh:aa57384b85622a9f7bfb5d4512ca88e61f22a9cea9f30febaa4c98c68ff0dc21", - "zh:c4a3e329ba786ffb6f2b694e1fd41d413a7010f3a53c20b432325a94fa71e839", - "zh:e2699bc9116447f96c53d55f2a00570f982e6f9935038c3810603572693712d0", - "zh:e747c0fd5d7684e5bfad8aa0ca441903f15ae7a98a737ff6aca24ba223207e2c", - "zh:f1ca75f417ce490368f047b63ec09fd003711ae48487fba90b4aba2ccf71920e", + "zh:a357ab512e5ebc6d1fda1382503109766e21bbfdfaa9ccda43d313c122069b30", + "zh:c51bfb15e7d52cc1a2eaec2a903ac2aff15d162c172b1b4c17675190e8147615", + "zh:e0951ee6fa9df90433728b96381fb867e3db98f66f735e0c3e24f8f16903f0ad", + "zh:e3cdcb4e73740621dabd82ee6a37d6cfce7fee2a03d8074df65086760f5cf556", + "zh:eff58323099f1bd9a0bec7cb04f717e7f1b2774c7d612bf7581797e1622613a0", ] } provider "registry.terraform.io/hashicorp/time" { - version = "0.11.1" + version = "0.12.1" constraints = ">= 0.7.1" hashes = [ - "h1:pQGSL9mdgw4qsLndFYsEF93mbsIxyxNoAyIbBqhS3Xo=", - "zh:19a393db736ec4fd024d098d55aefaef07056c37a448ece3b55b3f5f4c2c7e4a", - "zh:227fa1e221de2907f37be78d40c06ca6a6f7b243a1ec33ade014dfaf6d92cd9c", - "zh:29970fecbf4a3ca23bacbb05d6b90cdd33dd379f90059fe39e08289951502d9f", - "zh:65024596f22f10e7dcb5e0e4a75277f275b529daa0bc0daf34ca7901c678ab88", - "zh:694d080cb5e3bf5ef08c7409208d061c135a4f5f4cdc93ea8607860995264b2e", + "h1:JzYsPugN8Fb7C4NlfLoFu7BBPuRVT2/fCOdCaxshveI=", + "zh:090023137df8effe8804e81c65f636dadf8f9d35b79c3afff282d39367ba44b2", + "zh:26f1e458358ba55f6558613f1427dcfa6ae2be5119b722d0b3adb27cd001efea", + "zh:272ccc73a03384b72b964918c7afeb22c2e6be22460d92b150aaf28f29a7d511", + "zh:438b8c74f5ed62fe921bd1078abe628a6675e44912933100ea4fa26863e340e9", "zh:78d5eefdd9e494defcb3c68d282b8f96630502cac21d1ea161f53cfe9bb483b3", - "zh:b29d15d13e1b3412e6a4e1627d378dbd102659132f7488f64017dd6b6d5216d3", - "zh:bb79f4cae9f8c17c73998edc54aa16c2130a03227f7f4e71fc6ac87e230575ec", - "zh:ceccf80e95929d97f62dcf1bb3c7c7553d5757b2d9e7d222518722fc934f7ad5", - "zh:f40e638336527490e294d9c938ae55919069e6987e85a80506784ba90348792a", - "zh:f99ef33b1629a3b2278201142a3011a8489e66d92da832a5b99e442204de18fb", - "zh:fded14754ea46fdecc62a52cd970126420d4cd190e598cb61190b4724a727edb", + "zh:85c8bd8eefc4afc33445de2ee7fbf33a7807bc34eb3734b8eefa4e98e4cddf38", + "zh:98bbe309c9ff5b2352de6a047e0ec6c7e3764b4ed3dfd370839c4be2fbfff869", + "zh:9c7bf8c56da1b124e0e2f3210a1915e778bab2be924481af684695b52672891e", + "zh:d2200f7f6ab8ecb8373cda796b864ad4867f5c255cff9d3b032f666e4c78f625", + "zh:d8c7926feaddfdc08d5ebb41b03445166df8c125417b28d64712dccd9feef136", + "zh:e2412a192fc340c61b373d6c20c9d805d7d3dee6c720c34db23c2a8ff0abd71b", + "zh:e6ac6bba391afe728a099df344dbd6481425b06d61697522017b8f7a59957d44", ] } provider "registry.terraform.io/hashicorp/tls" { - version = "4.0.5" + version = "4.0.6" hashes = [ - "h1:zeG5RmggBZW/8JWIVrdaeSJa0OG62uFX5HY1eE8SjzY=", - "zh:01cfb11cb74654c003f6d4e32bbef8f5969ee2856394a96d127da4949c65153e", - "zh:0472ea1574026aa1e8ca82bb6df2c40cd0478e9336b7a8a64e652119a2fa4f32", - "zh:1a8ddba2b1550c5d02003ea5d6cdda2eef6870ece86c5619f33edd699c9dc14b", - "zh:1e3bb505c000adb12cdf60af5b08f0ed68bc3955b0d4d4a126db5ca4d429eb4a", - "zh:6636401b2463c25e03e68a6b786acf91a311c78444b1dc4f97c539f9f78de22a", - "zh:76858f9d8b460e7b2a338c477671d07286b0d287fd2d2e3214030ae8f61dd56e", - "zh:a13b69fb43cb8746793b3069c4d897bb18f454290b496f19d03c3387d1c9a2dc", - "zh:a90ca81bb9bb509063b736842250ecff0f886a91baae8de65c8430168001dad9", - "zh:c4de401395936e41234f1956ebadbd2ed9f414e6908f27d578614aaa529870d4", - "zh:c657e121af8fde19964482997f0de2d5173217274f6997e16389e7707ed8ece8", - "zh:d68b07a67fbd604c38ec9733069fbf23441436fecf554de6c75c032f82e1ef19", + "h1:n3M50qfWfRSpQV9Pwcvuse03pEizqrmYEryxKky4so4=", + "zh:10de0d8af02f2e578101688fd334da3849f56ea91b0d9bd5b1f7a243417fdda8", + "zh:37fc01f8b2bc9d5b055dc3e78bfd1beb7c42cfb776a4c81106e19c8911366297", + "zh:4578ca03d1dd0b7f572d96bd03f744be24c726bfd282173d54b100fd221608bb", + "zh:6c475491d1250050765a91a493ef330adc24689e8837a0f07da5a0e1269e11c1", + "zh:81bde94d53cdababa5b376bbc6947668be4c45ab655de7aa2e8e4736dfd52509", + "zh:abdce260840b7b050c4e401d4f75c7a199fafe58a8b213947a258f75ac18b3e8", + "zh:b754cebfc5184873840f16a642a7c9ef78c34dc246a8ae29e056c79939963c7a", + "zh:c928b66086078f9917aef0eec15982f2e337914c5c4dbc31dd4741403db7eb18", + "zh:cded27bee5f24de6f2ee0cfd1df46a7f88e84aaffc2ecbf3ff7094160f193d50", + "zh:d65eb3867e8f69aaf1b8bb53bd637c99c6b649ba3db16ded50fa9a01076d1a27", + "zh:ecb0c8b528c7a619fa71852bb3fb5c151d47576c5aab2bf3af4db52588722eeb", "zh:f569b65999264a9416862bca5cd2a6177d94ccb0424f3a4ef424428912b9cb3c", ] } diff --git a/tf/environments/prod/dns_records.tf b/tf/environments/prod/dns_records.tf index c2d680a7..fd77fa54 100644 --- a/tf/environments/prod/dns_records.tf +++ b/tf/environments/prod/dns_records.tf @@ -1005,3 +1005,51 @@ resource "aws_route53_record" "notebook-ooni-org-_A_" { type = "A" zone_id = local.dns_root_zone_ooni_org } + +resource "aws_route53_record" "data1-htz-fsn-prod-ooni-nu-_a_" { + name = "data1.htz-fsn.prod.ooni.nu" + records = ["142.132.254.225"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_nu +} + +resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { + name = "data2.htz-fsn.prod.ooni.nu" + records = ["88.198.54.12"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_nu +} + +resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { + name = "data3.htz-fsn.prod.ooni.nu" + records = ["168.119.7.188"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_nu +} + +resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" { + name = "clickhouse1.prod.ooni.io" + records = ["142.132.254.225"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_io +} + +resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" { + name = "clickhouse2.prod.ooni.io" + records = ["88.198.54.12"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_io +} + +resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" { + name = "clickhouse3.prod.ooni.io" + records = ["168.119.7.188"] + ttl = "60" + type = "A" + zone_id = local.dns_root_zone_ooni_io +} diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index cf228693..477ffcdb 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -42,6 +42,18 @@ provider "aws" { data "aws_availability_zones" "available" {} +data "aws_secretsmanager_secret" "do_token" { + name = "oonidevops/digitalocean_access_token" +} + +data "aws_secretsmanager_secret_version" "do_token_version" { + secret_id = data.aws_secretsmanager_secret.do_token.id +} + +provider "digitalocean" { + token = data.aws_secretsmanager_secret_version.do_token_version.secret_string +} + ### !!! IMPORTANT !!! # The first time you run terraform for a new environment you have to setup the # required roles in AWS. diff --git a/tf/environments/prod/versions.tf b/tf/environments/prod/versions.tf index 682191e7..3c3ed712 100644 --- a/tf/environments/prod/versions.tf +++ b/tf/environments/prod/versions.tf @@ -6,5 +6,9 @@ terraform { source = "hashicorp/aws" version = ">= 4.66.1" } + digitalocean = { + source = "digitalocean/digitalocean" + version = "~> 2.0" + } } } From edef980364e3f5d11f01ddef73329378b1d4b9c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 22 Nov 2024 16:51:32 +0100 Subject: [PATCH 21/88] Fixes to clickhouse related configs --- ansible/group_vars/clickhouse/vars.yml | 21 ++++++++++++++++++++- ansible/host_vars/notebook.ooni.org | 4 +++- ansible/playbook.yml | 4 ++-- 3 files changed, 25 insertions(+), 4 deletions(-) diff --git a/ansible/group_vars/clickhouse/vars.yml b/ansible/group_vars/clickhouse/vars.yml index 129a81cb..8e7388e8 100644 --- a/ansible/group_vars/clickhouse/vars.yml +++ b/ansible/group_vars/clickhouse/vars.yml @@ -154,6 +154,8 @@ clickhouse_distributed_ddl: clickhouse_default_profiles: default: readonly: 2 + readonly: + readonly: 1 write: readonly: 0 @@ -168,10 +170,27 @@ clickhouse_default_users: - "127.0.0.1" profile: default quota: default + - user: + name: readonly + password_sha256_hex: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_readonly_password', profile='oonidevops_user_prod') | hash('sha256') }}" + networks: + - "0.0.0.0/0" + profile: readonly + quota: default - user: name: write - password_sha256_hex: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" + password_sha256_hex: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" networks: - "0.0.0.0/0" profile: write quota: default + +clickhouse_default_quotas: + - quota: + name: default + duration: 3600 + queries: 0 + errors: 0 + result_rows: 0 + read_rows: 0 + execution_time: 0 diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 9d75d85e..0fbb91e9 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -75,6 +75,8 @@ clickhouse_default_profiles: write: readonly: 0 +clickhouse_version: "24.10.2.80" +clickhouse_release_type: stable clickhouse_listen_hosts: - "127.0.0.1" @@ -88,7 +90,7 @@ clickhouse_default_users: quota: default - user: name: write - password_sha256_hex: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" + password_sha256_hex: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_notebook_write_password', profile='oonidevops_user_prod') | hash('sha256') }}" networks: - "127.0.0.1" profile: write diff --git a/ansible/playbook.yml b/ansible/playbook.yml index ac94df4f..63d2b448 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -34,7 +34,7 @@ tags: - clickhouse roles: - - tailnet + #- tailnet - oonidata_clickhouse - name: Deploy oonidata worker nodes @@ -48,7 +48,7 @@ vars: enable_jupyterhub: false enable_oonipipeline_worker: true - clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_secret', 'oonidevops/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" + clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" - name: Deploy notebook host hosts: notebook.ooni.org From 3f4792192ee81f3eb45086239fb61fea42296aae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 17:56:18 -0500 Subject: [PATCH 22/88] Retrieve digital ocean token from parameter store --- tf/environments/dev/main.tf | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 09e4636c..4729f059 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -34,10 +34,13 @@ provider "aws" { # source_profile = oonidevops_user } -# In order for this provider to work you have to set the following environment -# variable to your DigitalOcean API token: -# DIGITALOCEAN_ACCESS_TOKEN= -provider "digitalocean" {} +data "aws_ssm_parameter" "do_token" { + name = "/oonidevops/secrets/digitalocean_access_token" +} + +provider "digitalocean" { + token = data.aws_ssm_parameter.do_token.value +} data "aws_availability_zones" "available" {} From 2638566f643f024f3066d7e99a884e2be4fe4724 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 17:56:40 -0500 Subject: [PATCH 23/88] Change ECS cluster instance sizes --- tf/environments/dev/main.tf | 14 +++++--------- tf/modules/ooniapi_service/main.tf | 3 +-- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 4729f059..9905b8ce 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -319,7 +319,7 @@ module "ooniapi_cluster" { asg_max = 6 asg_desired = 2 - instance_type = "t3a.medium" + instance_type = "t3a.nano" tags = merge( local.tags, @@ -349,8 +349,7 @@ module "ooniapi_ooniprobe_deployer" { module "ooniapi_ooniprobe" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 # First run should be set on first run to bootstrap the task definition # first_run = true @@ -403,8 +402,7 @@ module "ooniapi_oonirun_deployer" { module "ooniapi_oonirun" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -454,8 +452,7 @@ module "ooniapi_oonifindings_deployer" { module "ooniapi_oonifindings" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -505,8 +502,7 @@ module "ooniapi_ooniauth_deployer" { module "ooniapi_ooniauth" { source = "../../modules/ooniapi_service" - task_cpu = 256 - task_memory = 512 + task_memory = 64 vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id diff --git a/tf/modules/ooniapi_service/main.tf b/tf/modules/ooniapi_service/main.tf index ad429a01..773cb0e7 100644 --- a/tf/modules/ooniapi_service/main.tf +++ b/tf/modules/ooniapi_service/main.tf @@ -59,13 +59,12 @@ resource "aws_ecs_task_definition" "ooniapi_service" { container_definitions = jsonencode([ { - cpu = var.task_cpu, + memoryReservation = var.task_memory, essential = true, image = try( data.aws_ecs_container_definition.ooniapi_service_current[0].image, var.default_docker_image_url ), - memory = var.task_memory, name = local.name, portMappings = [ From fee97f3b02a578276a59fda89c359248abf5cfbe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 17:56:52 -0500 Subject: [PATCH 24/88] Add support for deploying oonibackend proxy nginx proxy --- tf/environments/dev/main.tf | 55 ++++++++++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 9905b8ce..8e5e6fc8 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -381,6 +381,59 @@ module "ooniapi_ooniprobe" { ) } +#### OONI Backend proxy service + +module "ooniapi_backendproxy_deployer" { + source = "../../modules/ooniapi_service_deployer" + + service_name = "ooniprobe" + repo = "ooni/backend" + branch_name = "master" + buildspec_path = "ooniapi/services/reverseproxy/buildspec.yml" + codestar_connection_arn = aws_codestarconnections_connection.oonidevops.arn + + codepipeline_bucket = aws_s3_bucket.ooniapi_codepipeline_bucket.bucket + + ecs_service_name = module.ooniapi_ooniprobe.ecs_service_name + ecs_cluster_name = module.ooniapi_cluster.cluster_name +} + +module "ooniapi_backendproxy" { + source = "../../modules/ooniapi_service" + + # First run should be set on first run to bootstrap the task definition + first_run = true + + vpc_id = module.network.vpc_id + public_subnet_ids = module.network.vpc_subnet_public[*].id + private_subnet_ids = module.network.vpc_subnet_private[*].id + + service_name = "oonibackendproxy" + default_docker_image_url = "ooni/api-reverseproxy:latest" + stage = local.environment + dns_zone_ooni_io = local.dns_zone_ooni_io + key_name = module.adm_iam_roles.oonidevops_key_name + ecs_cluster_id = module.ooniapi_cluster.cluster_id + + task_secrets = { + PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn + } + + task_environment = { + TARGET_URL = "https://backend-hel.ooni.org/" + CLICKHOUSE_STREAM_TARGET = "clickhouse1.prod.ooni.io:9000" + } + + ooniapi_service_security_groups = [ + module.ooniapi_cluster.web_security_group_id + ] + + tags = merge( + local.tags, + { Name = "ooni-tier0-oonibackendproxy" } + ) +} + #### OONI Run service @@ -594,7 +647,7 @@ locals { } resource "aws_route53_record" "ooniapi_frontend_main" { - name = local.ooniapi_frontend_main_domain_name + name = local.ooniapi_frontend_main_domain_name zone_id = local.ooniapi_frontend_main_domain_name_zone_id type = "A" From 0a0eea9b1a805b24608327a175b253fa0fd146ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 17:58:47 -0500 Subject: [PATCH 25/88] Change default task memory --- tf/environments/dev/main.tf | 2 ++ tf/modules/ooniapi_service/variables.tf | 7 +------ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 8e5e6fc8..ec3a5958 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -401,6 +401,8 @@ module "ooniapi_backendproxy_deployer" { module "ooniapi_backendproxy" { source = "../../modules/ooniapi_service" + task_memory = 64 + # First run should be set on first run to bootstrap the task definition first_run = true diff --git a/tf/modules/ooniapi_service/variables.tf b/tf/modules/ooniapi_service/variables.tf index f83e16d7..dad1b902 100644 --- a/tf/modules/ooniapi_service/variables.tf +++ b/tf/modules/ooniapi_service/variables.tf @@ -44,13 +44,8 @@ variable "service_desired_count" { default = 1 } -variable "task_cpu" { - default = 256 - description = "https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#task_size" -} - variable "task_memory" { - default = 512 + default = 64 description = "https://docs.aws.amazon.com/AmazonECS/latest/developerguide/task_definition_parameters.html#task_size" } From a019a3cf14b6c336b0442fda480da0b2f553177c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 17:59:30 -0500 Subject: [PATCH 26/88] Fix duplicate service name --- tf/environments/dev/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index ec3a5958..d1451963 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -386,7 +386,7 @@ module "ooniapi_ooniprobe" { module "ooniapi_backendproxy_deployer" { source = "../../modules/ooniapi_service_deployer" - service_name = "ooniprobe" + service_name = "oonibackendproxy" repo = "ooni/backend" branch_name = "master" buildspec_path = "ooniapi/services/reverseproxy/buildspec.yml" From 301e16a72fee141419eff5309a6c1342786f6801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 18:21:47 -0500 Subject: [PATCH 27/88] Drop backendproxy --- tf/environments/dev/main.tf | 29 ++--------------------------- 1 file changed, 2 insertions(+), 27 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index d1451963..3b724e21 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -280,31 +280,6 @@ module "ooni_th_droplet" { dns_zone_ooni_io = local.dns_zone_ooni_io } -module "ooni_backendproxy" { - source = "../../modules/ooni_backendproxy" - - stage = local.environment - - vpc_id = module.network.vpc_id - subnet_id = module.network.vpc_subnet_public[0].id - private_subnet_cidr = module.network.vpc_subnet_private[*].cidr_block - dns_zone_ooni_io = local.dns_zone_ooni_io - - key_name = module.adm_iam_roles.oonidevops_key_name - instance_type = "t2.micro" - - backend_url = "https://backend-hel.ooni.org/" - wcth_addresses = module.ooni_th_droplet.droplet_ipv4_address - wcth_domain_suffix = "th.dev.ooni.io" - clickhouse_url = "backend-fsn.ooni.org" - clickhouse_port = "9000" - - tags = merge( - local.tags, - { Name = "ooni-tier0-backendproxy" } - ) -} - ### OONI Services Clusters module "ooniapi_cluster" { @@ -394,7 +369,7 @@ module "ooniapi_backendproxy_deployer" { codepipeline_bucket = aws_s3_bucket.ooniapi_codepipeline_bucket.bucket - ecs_service_name = module.ooniapi_ooniprobe.ecs_service_name + ecs_service_name = module.ooniapi_backendproxy.ecs_service_name ecs_cluster_name = module.ooniapi_cluster.cluster_name } @@ -613,7 +588,7 @@ module "ooniapi_frontend" { vpc_id = module.network.vpc_id subnet_ids = module.network.vpc_subnet_public[*].id - oonibackend_proxy_target_group_arn = module.ooni_backendproxy.alb_target_group_id + oonibackend_proxy_target_group_arn = module.ooniapi_backendproxy.alb_target_group_id ooniapi_oonirun_target_group_arn = module.ooniapi_oonirun.alb_target_group_id ooniapi_ooniauth_target_group_arn = module.ooniapi_ooniauth.alb_target_group_id ooniapi_ooniprobe_target_group_arn = module.ooniapi_ooniprobe.alb_target_group_id From cc729350f3662ed00ed5a2073d89c8ca5d32ade4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 18:25:22 -0500 Subject: [PATCH 28/88] Point oonifindings to oonidata branch --- tf/environments/dev/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 3b724e21..49232246 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -469,7 +469,7 @@ module "ooniapi_oonifindings_deployer" { service_name = "oonifindings" repo = "ooni/backend" - branch_name = "master" + branch_name = "oonidata" buildspec_path = "ooniapi/services/oonifindings/buildspec.yml" codestar_connection_arn = aws_codestarconnections_connection.oonidevops.arn From f5b0e13a97d0e45d4fa929749f684ad5002d3395 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 18:42:01 -0500 Subject: [PATCH 29/88] Add clickhouseproxy setup --- tf/environments/dev/main.tf | 26 +++++++++++++++++++++++++ tf/modules/ooni_backendproxy/main.tf | 9 +-------- tf/modules/ooniapi_service/main.tf | 7 +------ tf/modules/ooniapi_service/variables.tf | 2 +- 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 49232246..5b9a2d0e 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -411,6 +411,32 @@ module "ooniapi_backendproxy" { ) } +module "ooni_backendproxy" { + source = "../../modules/ooni_backendproxy" + + stage = local.environment + + vpc_id = module.network.vpc_id + subnet_id = module.network.vpc_subnet_public[0].id + private_subnet_cidr = module.network.vpc_subnet_private[*].cidr_block + dns_zone_ooni_io = local.dns_zone_ooni_io + + key_name = module.adm_iam_roles.oonidevops_key_name + instance_type = "t3a.nano" + + backend_url = "https://backend-fsn.ooni.org/" + wcth_addresses = module.ooni_th_droplet.droplet_ipv4_address + wcth_domain_suffix = "th.ooni.org" + clickhouse_url = "clickhouse1.prod.ooni.io" + clickhouse_port = "9000" + + tags = merge( + local.tags, + { Name = "ooni-tier0-backendproxy" } + ) +} + + #### OONI Run service diff --git a/tf/modules/ooni_backendproxy/main.tf b/tf/modules/ooni_backendproxy/main.tf index ad5b9bec..81c98ee0 100644 --- a/tf/modules/ooni_backendproxy/main.tf +++ b/tf/modules/ooni_backendproxy/main.tf @@ -10,13 +10,6 @@ resource "aws_security_group" "nginx_sg" { vpc_id = var.vpc_id - ingress { - protocol = "tcp" - from_port = 80 - to_port = 80 - cidr_blocks = ["0.0.0.0/0"] - } - ingress { protocol = "tcp" from_port = 9000 @@ -132,7 +125,7 @@ resource "aws_lb_target_group_attachment" "oonibackend_proxy" { resource "aws_route53_record" "clickhouse_proxy_alias" { zone_id = var.dns_zone_ooni_io - name = "clickhouse.${var.stage}.ooni.io" + name = "clickhouseproxy.${var.stage}.ooni.io" type = "CNAME" ttl = 300 diff --git a/tf/modules/ooniapi_service/main.tf b/tf/modules/ooniapi_service/main.tf index 773cb0e7..c5def884 100644 --- a/tf/modules/ooniapi_service/main.tf +++ b/tf/modules/ooniapi_service/main.tf @@ -40,11 +40,6 @@ resource "aws_cloudwatch_log_group" "ooniapi_service" { name = "ooni-ecs-group/${local.name}" } - -locals { - container_port = 80 -} - // This is done to retrieve the image name of the current task definition // It's important to keep aligned the container_name and task_definitions data "aws_ecs_container_definition" "ooniapi_service_current" { @@ -69,7 +64,7 @@ resource "aws_ecs_task_definition" "ooniapi_service" { portMappings = [ { - containerPort = local.container_port, + containerPort = 80 } ], diff --git a/tf/modules/ooniapi_service/variables.tf b/tf/modules/ooniapi_service/variables.tf index dad1b902..bda90a72 100644 --- a/tf/modules/ooniapi_service/variables.tf +++ b/tf/modules/ooniapi_service/variables.tf @@ -74,4 +74,4 @@ variable "task_environment" { variable "ooniapi_service_security_groups" { description = "the shared web security group from the ecs cluster" type = list(string) -} +} \ No newline at end of file From 57dda333bcc0d7dc47928cbf56023792a5d8938c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 25 Nov 2024 18:47:13 -0500 Subject: [PATCH 30/88] Add clickhouse proxy nftable rules --- ansible/group_vars/clickhouse/vars.yml | 2 ++ tf/environments/dev/main.tf | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/ansible/group_vars/clickhouse/vars.yml b/ansible/group_vars/clickhouse/vars.yml index 8e7388e8..8483e095 100644 --- a/ansible/group_vars/clickhouse/vars.yml +++ b/ansible/group_vars/clickhouse/vars.yml @@ -7,6 +7,8 @@ nftables_clickhouse_allow: ip: 168.119.7.188 - fqdn: notebook.ooni.org ip: 138.201.19.39 + - fqdn: clickhouseproxy.dev.ooni.io + ip: "{{ lookup('dig', 'clickhouseproxy.dev.ooni.io/A') }}" nftables_zookeeper_allow: - fqdn: data1.htz-fsn.prod.ooni.nu diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 5b9a2d0e..b0c19bb4 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -398,7 +398,6 @@ module "ooniapi_backendproxy" { task_environment = { TARGET_URL = "https://backend-hel.ooni.org/" - CLICKHOUSE_STREAM_TARGET = "clickhouse1.prod.ooni.io:9000" } ooniapi_service_security_groups = [ @@ -527,6 +526,10 @@ module "ooniapi_oonifindings" { PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn } + task_environment = { + CLICKHOUSE_URL = "clickhouse://clickhouseproxy.dev.ooni.io" + } + ooniapi_service_security_groups = [ module.ooniapi_cluster.web_security_group_id ] From 7512a3bd6f30339556e21cb0e316c75fcdfaa84f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:01:47 -0500 Subject: [PATCH 31/88] Add extra path rules for oonidata endpoint --- tf/modules/ooniapi_frontend/main.tf | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/tf/modules/ooniapi_frontend/main.tf b/tf/modules/ooniapi_frontend/main.tf index c72937a2..d65f3b9d 100644 --- a/tf/modules/ooniapi_frontend/main.tf +++ b/tf/modules/ooniapi_frontend/main.tf @@ -182,7 +182,12 @@ resource "aws_lb_listener_rule" "ooniapi_oonifindings_rule" { condition { path_pattern { - values = ["/api/v1/incidents/*"] + values = [ + "/api/v1/incidents/*", + "/api/v1/aggregation/*", + "/api/v1/observations", + "/api/v1/analysis", + ] } } } From 94338ad0674c007a5129ea7940392e4b978c0cfe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:12:41 -0500 Subject: [PATCH 32/88] Fix labeling of reverse proxy --- tf/environments/dev/main.tf | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index b0c19bb4..226dfb26 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -358,10 +358,10 @@ module "ooniapi_ooniprobe" { #### OONI Backend proxy service -module "ooniapi_backendproxy_deployer" { +module "ooniapi_reverseproxy_deployer" { source = "../../modules/ooniapi_service_deployer" - service_name = "oonibackendproxy" + service_name = "reverseproxy" repo = "ooni/backend" branch_name = "master" buildspec_path = "ooniapi/services/reverseproxy/buildspec.yml" @@ -369,11 +369,11 @@ module "ooniapi_backendproxy_deployer" { codepipeline_bucket = aws_s3_bucket.ooniapi_codepipeline_bucket.bucket - ecs_service_name = module.ooniapi_backendproxy.ecs_service_name + ecs_service_name = module.ooniapi_reverseproxy.ecs_service_name ecs_cluster_name = module.ooniapi_cluster.cluster_name } -module "ooniapi_backendproxy" { +module "ooniapi_reverseproxy" { source = "../../modules/ooniapi_service" task_memory = 64 @@ -385,7 +385,7 @@ module "ooniapi_backendproxy" { public_subnet_ids = module.network.vpc_subnet_public[*].id private_subnet_ids = module.network.vpc_subnet_private[*].id - service_name = "oonibackendproxy" + service_name = "reverseproxy" default_docker_image_url = "ooni/api-reverseproxy:latest" stage = local.environment dns_zone_ooni_io = local.dns_zone_ooni_io @@ -406,7 +406,7 @@ module "ooniapi_backendproxy" { tags = merge( local.tags, - { Name = "ooni-tier0-oonibackendproxy" } + { Name = "ooni-tier0-reverseproxy" } ) } From b6866f32f157c14a914d945e6ad86adc4a04766e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:14:27 -0500 Subject: [PATCH 33/88] Fix typo --- tf/environments/dev/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 226dfb26..dcff7943 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -617,7 +617,7 @@ module "ooniapi_frontend" { vpc_id = module.network.vpc_id subnet_ids = module.network.vpc_subnet_public[*].id - oonibackend_proxy_target_group_arn = module.ooniapi_backendproxy.alb_target_group_id + oonibackend_proxy_target_group_arn = module.ooniapi_reverseproxy.alb_target_group_id ooniapi_oonirun_target_group_arn = module.ooniapi_oonirun.alb_target_group_id ooniapi_ooniauth_target_group_arn = module.ooniapi_ooniauth.alb_target_group_id ooniapi_ooniprobe_target_group_arn = module.ooniapi_ooniprobe.alb_target_group_id From 96657f92fa4fdf24f75cbd4908b2ee2384980ab1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:19:55 -0500 Subject: [PATCH 34/88] Bump up size of ecs_cluster --- tf/environments/dev/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index dcff7943..da876ca9 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -294,7 +294,7 @@ module "ooniapi_cluster" { asg_max = 6 asg_desired = 2 - instance_type = "t3a.nano" + instance_type = "t3a.micro" tags = merge( local.tags, From 534a8a6c24c06d248d87ed8d058e6e0e94bb9a67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:43:57 -0500 Subject: [PATCH 35/88] Use readonly user password --- tf/environments/dev/main.tf | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index da876ca9..2b14235b 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -229,6 +229,10 @@ resource "aws_secretsmanager_secret_version" "oonipg_url" { ) } +data "aws_ssm_parameter" "clickhouse_readonly_url" { + name = "/oonidevops/secrets/clickhouse_readonly_url" +} + resource "random_id" "artifact_id" { byte_length = 4 } @@ -379,7 +383,7 @@ module "ooniapi_reverseproxy" { task_memory = 64 # First run should be set on first run to bootstrap the task definition - first_run = true + # first_run = true vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id @@ -524,10 +528,7 @@ module "ooniapi_oonifindings" { POSTGRESQL_URL = aws_secretsmanager_secret_version.oonipg_url.arn JWT_ENCRYPTION_KEY = aws_secretsmanager_secret_version.jwt_secret.arn PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn - } - - task_environment = { - CLICKHOUSE_URL = "clickhouse://clickhouseproxy.dev.ooni.io" + CLICKHOUSE_URL = data.aws_ssm_parameter.clickhouse_readonly_url.arn } ooniapi_service_security_groups = [ From 1ef8815521708469e959aea293877f494d1d3bf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 26 Nov 2024 17:54:36 -0500 Subject: [PATCH 36/88] Add ssm permission to service task --- .../ooniapi_service/templates/profile_policy.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tf/modules/ooniapi_service/templates/profile_policy.json b/tf/modules/ooniapi_service/templates/profile_policy.json index 5857ee55..3a772893 100644 --- a/tf/modules/ooniapi_service/templates/profile_policy.json +++ b/tf/modules/ooniapi_service/templates/profile_policy.json @@ -35,6 +35,16 @@ "Action": "secretsmanager:ListSecrets", "Resource": "*" }, + { + "Effect": "Allow", + "Action": [ + "ssm:GetParameter", + "ssm:GetParameters", + "ssm:GetParameterHistory", + "ssm:GetParametersByPath" + ], + "Resource": "arn:aws:ssm:*" + }, { "Effect": "Allow", "Action": [ From 7a1d062239dee2b7b799a832de0554d26adcd91d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 27 Nov 2024 15:05:44 -0500 Subject: [PATCH 37/88] Add scripts related to migrating backend-fsn tables over to cluster --- scripts/cluster-migration/migrate-tables.py | 38 +++++++++ scripts/cluster-migration/schema.sql | 95 +++++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 scripts/cluster-migration/migrate-tables.py create mode 100644 scripts/cluster-migration/schema.sql diff --git a/scripts/cluster-migration/migrate-tables.py b/scripts/cluster-migration/migrate-tables.py new file mode 100644 index 00000000..2a3d4bfb --- /dev/null +++ b/scripts/cluster-migration/migrate-tables.py @@ -0,0 +1,38 @@ +import os + +from tqdm import tqdm +from clickhouse_driver import Client as ClickhouseClient + + +WRITE_CLICKHOUSE_URL = os.environ["WRITE_CLICKHOUSE_URL"] + + +def stream_table(table_name, where_clause): + with ClickhouseClient.from_url("clickhouse://backend-fsn.ooni.org/") as click: + for row in click.execute_iter(f"SELECT * FROM {table_name} {where_clause}"): + yield row + + +def copy_table(table_name, where_clause): + with ClickhouseClient.from_url(WRITE_CLICKHOUSE_URL) as click_writer: + buf = [] + for row in tqdm(stream_table(table_name=table_name, where_clause=where_clause)): + buf.append(row) + if len(buf) > 50_000: + click_writer.execute(f"INSERT INTO {table_name} VALUES", buf) + buf = [] + + if len(buf) > 0: + click_writer.execute(f"INSERT INTO {table_name} VALUES", buf) + + +if __name__ == "__main__": + assert WRITE_CLICKHOUSE_URL, "WRITE_CLICKHOUSE_URL environment variable is not set" + print("## copying `fastpath` table") + copy_table("fastpath", "WHERE measurement_uid < '20241127'") + print("## copying `jsonl` table") + copy_table("jsonl", "WHERE measurement_uid < '20241127'") + print("## copying `citizenlab` table") + copy_table("citizenlab", "") + print("## copying `citizenlab_flip` table") + copy_table("citizenlab_flip", "") diff --git a/scripts/cluster-migration/schema.sql b/scripts/cluster-migration/schema.sql new file mode 100644 index 00000000..9a374974 --- /dev/null +++ b/scripts/cluster-migration/schema.sql @@ -0,0 +1,95 @@ +CREATE TABLE + ooni.jsonl ( + `report_id` String, + `input` String, + `s3path` String, + `linenum` Int32, + `measurement_uid` String, + `date` Date, + `source` String, + `update_time` DateTime64 (3) MATERIALIZED now64 () + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/jsonl/{shard}', + '{replica}', + update_time + ) +ORDER BY + (report_id, input, measurement_uid) SETTINGS index_granularity = 8192; + +CREATE TABLE + ooni.fastpath ( + `measurement_uid` String, + `report_id` String, + `input` String, + `probe_cc` LowCardinality (String), + `probe_asn` Int32, + `test_name` LowCardinality (String), + `test_start_time` DateTime, + `measurement_start_time` DateTime, + `filename` String, + `scores` String, + `platform` String, + `anomaly` String, + `confirmed` String, + `msm_failure` String, + `domain` String, + `software_name` String, + `software_version` String, + `control_failure` String, + `blocking_general` Float32, + `is_ssl_expected` Int8, + `page_len` Int32, + `page_len_ratio` Float32, + `server_cc` String, + `server_asn` Int8, + `server_as_name` String, + `update_time` DateTime64 (3) MATERIALIZED now64 (), + `test_version` String, + `architecture` String, + `engine_name` LowCardinality (String), + `engine_version` String, + `test_runtime` Float32, + `blocking_type` String, + `test_helper_address` LowCardinality (String), + `test_helper_type` LowCardinality (String), + `ooni_run_link_id` Nullable (UInt64), + INDEX fastpath_rid_idx report_id TYPE minmax GRANULARITY 1, + INDEX measurement_uid_idx measurement_uid TYPE minmax GRANULARITY 8 + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/fastpath/{shard}', + '{replica}', + update_time + ) +ORDER BY + ( + measurement_start_time, + report_id, + input, + measurement_uid + ) SETTINGS index_granularity = 8192; + +CREATE TABLE + ooni.citizenlab ( + `domain` String, + `url` String, + `cc` FixedString (32), + `category_code` String + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/citizenlab/{shard}', + '{replica}' + ) +ORDER BY + (domain, url, cc, category_code) SETTINGS index_granularity = 4; + +CREATE TABLE + ooni.citizenlab_flip ( + `domain` String, + `url` String, + `cc` FixedString (32), + `category_code` String + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/citizenlab_flip/{shard}', + '{replica}' + ) +ORDER BY + (domain, url, cc, category_code) SETTINGS index_granularity = 4; \ No newline at end of file From 10e596e9d758dcc5e837eb7c424d8f0d87423355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 28 Nov 2024 15:18:11 -0500 Subject: [PATCH 38/88] Add more schema updates --- scripts/cluster-migration/schema.sql | 52 +++++++++++++++++++++++++--- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/scripts/cluster-migration/schema.sql b/scripts/cluster-migration/schema.sql index 9a374974..7588f060 100644 --- a/scripts/cluster-migration/schema.sql +++ b/scripts/cluster-migration/schema.sql @@ -1,5 +1,5 @@ CREATE TABLE - ooni.jsonl ( + ooni.jsonl ON CLUSTER oonidata_cluster ( `report_id` String, `input` String, `s3path` String, @@ -17,7 +17,7 @@ ORDER BY (report_id, input, measurement_uid) SETTINGS index_granularity = 8192; CREATE TABLE - ooni.fastpath ( + ooni.fastpath ON CLUSTER oonidata_cluster ( `measurement_uid` String, `report_id` String, `input` String, @@ -69,7 +69,7 @@ ORDER BY ) SETTINGS index_granularity = 8192; CREATE TABLE - ooni.citizenlab ( + ooni.citizenlab ON CLUSTER oonidata_cluster ( `domain` String, `url` String, `cc` FixedString (32), @@ -82,7 +82,7 @@ ORDER BY (domain, url, cc, category_code) SETTINGS index_granularity = 4; CREATE TABLE - ooni.citizenlab_flip ( + ooni.citizenlab_flip ON CLUSTER oonidata_cluster ( `domain` String, `url` String, `cc` FixedString (32), @@ -92,4 +92,46 @@ CREATE TABLE '{replica}' ) ORDER BY - (domain, url, cc, category_code) SETTINGS index_granularity = 4; \ No newline at end of file + (domain, url, cc, category_code) SETTINGS index_granularity = 4; + +CREATE TABLE + analysis_web_measurement ON CLUSTER oonidata_cluster ( + `domain` String, + `input` String, + `test_name` String, + `probe_asn` UInt32, + `probe_as_org_name` String, + `probe_cc` String, + `resolver_asn` UInt32, + `resolver_as_cc` String, + `network_type` String, + `measurement_start_time` DateTime64 (3, 'UTC'), + `measurement_uid` String, + `ooni_run_link_id` String, + `top_probe_analysis` Nullable (String), + `top_dns_failure` Nullable (String), + `top_tcp_failure` Nullable (String), + `top_tls_failure` Nullable (String), + `dns_blocked` Float32, + `dns_down` Float32, + `dns_ok` Float32, + `tcp_blocked` Float32, + `tcp_down` Float32, + `tcp_ok` Float32, + `tls_blocked` Float32, + `tls_down` Float32, + `tls_ok` Float32 + ) ENGINE = ReplicatedReplacingMergeTree ( + '/clickhouse/{cluster}/tables/ooni/analysis_web_measurement/{shard}', + '{replica}' + ) +PARTITION BY + substring(measurement_uid, 1, 6) PRIMARY KEY measurement_uid +ORDER BY + ( + measurement_uid, + measurement_start_time, + probe_cc, + probe_asn, + domain + ) SETTINGS index_granularity = 8192; \ No newline at end of file From 118030f5bd3159e8301e097bacb06e32b57fd834 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 3 Dec 2024 12:43:18 -0500 Subject: [PATCH 39/88] Don't setup node_exporter as part of bootstrap role --- ansible/inventory | 10 ++++++++++ ansible/playbook.yml | 8 ++++++++ ansible/roles/bootstrap/tasks/main.yml | 5 ----- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/ansible/inventory b/ansible/inventory index 25f1f5df..31270622 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -20,3 +20,13 @@ notebook.ooni.org data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu + +[have_node_exporter] +data.ooni.org +oonidata.ooni.org +openvpn-server1.ooni.io +notebook.ooni.org +data1.htz-fsn.prod.ooni.nu +data2.htz-fsn.prod.ooni.nu +data3.htz-fsn.prod.ooni.nu +oonidatatest.ooni.nu diff --git a/ansible/playbook.yml b/ansible/playbook.yml index 63d2b448..be962b57 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -7,6 +7,14 @@ tags: - bootstrap +- name: Setup node_exporter on have_node_exporter hosts + hosts: all + become: yes + roles: + - prometheus_node_exporter + tags: + - prometheus_node_exporter + - name: Update monitoring config hosts: monitoring.ooni.org become: true diff --git a/ansible/roles/bootstrap/tasks/main.yml b/ansible/roles/bootstrap/tasks/main.yml index ecf1d46f..500d58ff 100644 --- a/ansible/roles/bootstrap/tasks/main.yml +++ b/ansible/roles/bootstrap/tasks/main.yml @@ -55,11 +55,6 @@ tags: - nftables -- ansible.builtin.include_role: - name: prometheus_node_exporter - tags: - - node_exporter - - name: Configure journald tags: - journald From 3ed88c00fd9e64ec71ec853dc4c6669863f9945f Mon Sep 17 00:00:00 2001 From: decfox Date: Wed, 4 Dec 2024 17:36:08 -0500 Subject: [PATCH 40/88] refactor: add base files for backend-hel deployment --- ansible/playbook-backend.yml | 16 + ansible/roles/base-bookworm/README.adoc | 1 + ansible/roles/base-bookworm/meta/main.yml | 6 + ansible/roles/base-bookworm/tasks/main.yml | 221 +++++ .../base-bookworm/templates/internal-deb.gpg | 14 + .../base-bookworm/templates/journald.conf | 8 + .../base-bookworm/templates/netdata.conf | 32 + .../templates/ooni_internal.sources | 7 + .../base-bookworm/templates/resolved.conf | 9 + .../base-bookworm/templates/sources.list | 6 + ansible/roles/dehydrated/README.adoc | 10 + ansible/roles/dehydrated/meta/main.yml | 5 + ansible/roles/dehydrated/tasks/main.yml | 108 +++ .../dehydrated/templates/dehydrated.service | 13 + .../dehydrated/templates/dehydrated.timer | 9 + .../roles/dehydrated/templates/domains.txt.j2 | 1 + ansible/roles/dehydrated/templates/hook.sh | 20 + .../dehydrated/templates/letsencrypt-http | 13 + ansible/roles/nftables-sysadmin/README.adoc | 25 + .../roles/nftables-sysadmin/tasks/main.yml | 49 ++ .../nftables-sysadmin/templates/nftables.conf | 41 + .../nginx-buster/files/ffdhe2048_dhparam.pem | 8 + .../nginx-buster/files/ssl_intermediate.conf | 3 + .../roles/nginx-buster/files/ssl_modern.conf | 4 + ansible/roles/nginx-buster/handlers/main.yml | 6 + ansible/roles/nginx-buster/tasks/main.yml | 33 + .../roles/nginx-buster/templates/nginx.conf | 68 ++ ansible/roles/ooni-backend/handlers/main.yml | 6 + ansible/roles/ooni-backend/meta/main.yml | 3 + ansible/roles/ooni-backend/tasks/main.yml | 822 ++++++++++++++++++ ansible/roles/ooni-backend/templates/444.nft | 2 + .../ooni-backend/templates/analysis.conf | 9 + .../ooni-backend/templates/api-uploader.conf | 9 + ansible/roles/ooni-backend/templates/api.conf | 60 ++ .../ooni-backend/templates/api.gunicorn.py | 12 + .../templates/clickhouse_config.xml | 41 + .../templates/clickhouse_readonly.xml | 9 + .../templates/clickhouse_users.xml | 31 + .../ooni-backend/templates/db-backup.conf | 17 + .../templates/deb_ooni_org.nginx.conf | 64 ++ .../ooni-backend/templates/dehydrated.config | 7 + .../templates/dehydrated_haproxy_hook.sh | 23 + .../ooni-backend/templates/fastpath.conf | 15 + .../roles/ooni-backend/templates/haproxy.cfg | 122 +++ .../templates/nginx-api-ams-pg.conf | 297 +++++++ .../ooni-backend/templates/nginx-api-fsn.conf | 260 ++++++ .../templates/nginx-api-test.conf | 157 ++++ .../templates/rotation_nginx_conf | 70 ++ .../ooni-backend/templates/rotation_setup.sh | 114 +++ .../ooni-backend/templates/tor_targets.json | 304 +++++++ 50 files changed, 3190 insertions(+) create mode 100644 ansible/playbook-backend.yml create mode 100644 ansible/roles/base-bookworm/README.adoc create mode 100644 ansible/roles/base-bookworm/meta/main.yml create mode 100644 ansible/roles/base-bookworm/tasks/main.yml create mode 100644 ansible/roles/base-bookworm/templates/internal-deb.gpg create mode 100644 ansible/roles/base-bookworm/templates/journald.conf create mode 100644 ansible/roles/base-bookworm/templates/netdata.conf create mode 100644 ansible/roles/base-bookworm/templates/ooni_internal.sources create mode 100644 ansible/roles/base-bookworm/templates/resolved.conf create mode 100644 ansible/roles/base-bookworm/templates/sources.list create mode 100644 ansible/roles/dehydrated/README.adoc create mode 100644 ansible/roles/dehydrated/meta/main.yml create mode 100644 ansible/roles/dehydrated/tasks/main.yml create mode 100644 ansible/roles/dehydrated/templates/dehydrated.service create mode 100644 ansible/roles/dehydrated/templates/dehydrated.timer create mode 100644 ansible/roles/dehydrated/templates/domains.txt.j2 create mode 100644 ansible/roles/dehydrated/templates/hook.sh create mode 100644 ansible/roles/dehydrated/templates/letsencrypt-http create mode 100644 ansible/roles/nftables-sysadmin/README.adoc create mode 100644 ansible/roles/nftables-sysadmin/tasks/main.yml create mode 100755 ansible/roles/nftables-sysadmin/templates/nftables.conf create mode 100644 ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem create mode 100644 ansible/roles/nginx-buster/files/ssl_intermediate.conf create mode 100644 ansible/roles/nginx-buster/files/ssl_modern.conf create mode 100644 ansible/roles/nginx-buster/handlers/main.yml create mode 100644 ansible/roles/nginx-buster/tasks/main.yml create mode 100644 ansible/roles/nginx-buster/templates/nginx.conf create mode 100644 ansible/roles/ooni-backend/handlers/main.yml create mode 100644 ansible/roles/ooni-backend/meta/main.yml create mode 100644 ansible/roles/ooni-backend/tasks/main.yml create mode 100644 ansible/roles/ooni-backend/templates/444.nft create mode 100644 ansible/roles/ooni-backend/templates/analysis.conf create mode 100644 ansible/roles/ooni-backend/templates/api-uploader.conf create mode 100644 ansible/roles/ooni-backend/templates/api.conf create mode 100644 ansible/roles/ooni-backend/templates/api.gunicorn.py create mode 100644 ansible/roles/ooni-backend/templates/clickhouse_config.xml create mode 100644 ansible/roles/ooni-backend/templates/clickhouse_readonly.xml create mode 100644 ansible/roles/ooni-backend/templates/clickhouse_users.xml create mode 100644 ansible/roles/ooni-backend/templates/db-backup.conf create mode 100644 ansible/roles/ooni-backend/templates/deb_ooni_org.nginx.conf create mode 100644 ansible/roles/ooni-backend/templates/dehydrated.config create mode 100644 ansible/roles/ooni-backend/templates/dehydrated_haproxy_hook.sh create mode 100644 ansible/roles/ooni-backend/templates/fastpath.conf create mode 100644 ansible/roles/ooni-backend/templates/haproxy.cfg create mode 100644 ansible/roles/ooni-backend/templates/nginx-api-ams-pg.conf create mode 100644 ansible/roles/ooni-backend/templates/nginx-api-fsn.conf create mode 100644 ansible/roles/ooni-backend/templates/nginx-api-test.conf create mode 100644 ansible/roles/ooni-backend/templates/rotation_nginx_conf create mode 100644 ansible/roles/ooni-backend/templates/rotation_setup.sh create mode 100644 ansible/roles/ooni-backend/templates/tor_targets.json diff --git a/ansible/playbook-backend.yml b/ansible/playbook-backend.yml new file mode 100644 index 00000000..c6b36579 --- /dev/null +++ b/ansible/playbook-backend.yml @@ -0,0 +1,16 @@ +--- +- hosts: backend-hel.ooni.org + roles: + - role: base-bookworm + - role: nftables + - role: nginx-buster + tags: nginx + - role: dehydrated + tags: dehydrated + expand: yes + ssl_domains: + # with dehydrated the first entry is the cert FQDN + # and the other ones are alternative names + - "backend-hel.ooni.org" + - role: ooni-backend + ssl_domain: backend-hel.ooni.org diff --git a/ansible/roles/base-bookworm/README.adoc b/ansible/roles/base-bookworm/README.adoc new file mode 100644 index 00000000..be579005 --- /dev/null +++ b/ansible/roles/base-bookworm/README.adoc @@ -0,0 +1 @@ +Configure base host based on Bookworm diff --git a/ansible/roles/base-bookworm/meta/main.yml b/ansible/roles/base-bookworm/meta/main.yml new file mode 100644 index 00000000..5de9bc56 --- /dev/null +++ b/ansible/roles/base-bookworm/meta/main.yml @@ -0,0 +1,6 @@ +--- +dependencies: + - role: adm + become: false + remote_user: root + gather_facts: false diff --git a/ansible/roles/base-bookworm/tasks/main.yml b/ansible/roles/base-bookworm/tasks/main.yml new file mode 100644 index 00000000..c9c3b1da --- /dev/null +++ b/ansible/roles/base-bookworm/tasks/main.yml @@ -0,0 +1,221 @@ +--- +- name: motd + shell: echo "" > /etc/motd + +- name: Set hostname + ansible.builtin.hostname: + name: "{{ inventory_hostname }}" + +- name: Remove apt repo + tags: apt + file: + path: /etc/apt/sources.list.d/ftp_nl_debian_org_debian.list + state: absent + +- name: Remove apt repo + tags: apt + file: + path: /etc/apt/sources.list.d/security_debian_org.list + state: absent + +- name: Create internal-deb repo GPG pubkey + tags: apt + template: + src: templates/internal-deb.gpg + dest: /etc/ooni/internal-deb.gpg + mode: 0644 + owner: root + +- name: Set apt repos + tags: apt + template: + src: templates/sources.list + dest: /etc/apt/sources.list + mode: 0644 + owner: root + +- name: Install gpg + tags: base-packages + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - gpg + - gpg-agent + +- name: Update apt cache + tags: apt + apt: + update_cache: yes + +- name: Installs base packages + tags: base-packages + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - bash-completion + - byobu + - chrony + - etckeeper + - fail2ban + - git + - iotop + - jupyter-notebook + - manpages + - ncdu + - netdata-core + - netdata-plugins-bash + - netdata-plugins-python + - netdata-web + - nftables + - nullmailer + - prometheus-node-exporter + - pv + # needed by ansible + - python3-apt + - rsync + - ssl-cert + - strace + - tcpdump + - tmux + - vim + +- name: Configure journald + tags: journald + template: + src: templates/journald.conf + dest: /etc/systemd/journald.conf + mode: 0644 + owner: root + +- name: enable and restart journald + tags: journald + systemd: + name: systemd-journald.service + state: restarted + enabled: yes + daemon_reload: yes + +- name: Autoremove + tags: autoremove + apt: + autoremove: yes + +- name: Clean cache + tags: apt + apt: + autoclean: yes + +- name: allow netdata.service + tags: netdata + blockinfile: + path: /etc/ooni/nftables/tcp/19999.nft + create: yes + block: | + add rule inet filter input ip saddr {{ lookup('dig', 'prometheus.ooni.org/A') }} tcp dport 19999 counter accept comment "netdata.service" + +#- name: reload nftables service +# systemd: +# name: nftables.service +# state: reloaded +# enabled: yes +# daemon_reload: yes + +- name: reload nftables service + service: name=nftables state=restarted + +- name: configure netdata.service + tags: netdata + template: + src: netdata.conf + dest: /etc/netdata/netdata.conf + +- name: disable netdata emails + tags: netdata + blockinfile: + path: /etc/netdata/conf.d/health_alarm_notify.conf + create: yes + block: | + # Managed by ansible, see roles/base-bookworm/tasks/main.yml + SEND_EMAIL="NO" + +- name: Set timezone + tags: timezone + timezone: + name: Etc/UTC + +- name: restart chrony service + tags: timezone + systemd: + name: chrony.service + state: restarted + +- name: configure netdata chrony + tags: netdata, timezone + blockinfile: + path: /etc/netdata/python.d/chrony.conf + create: yes + block: | + # Managed by ansible, see roles/base-bookworm/tasks/main.yml + update_every: 5 + local: + command: 'chronyc -n tracking' + +- name: configure netdata chrony + tags: netdata, timezone + lineinfile: + path: /usr/lib/netdata/conf.d/python.d.conf + regexp: '^chrony:' + line: 'chrony: yes' + +#- name: configure netdata nginx +# blockinfile: +# path: /etc/netdata/python.d/nginx.conf +# create: yes +# block: | +# # Managed by ansible, see roles/base-bookworm/tasks/main.yml +# update_every: 5 +# nginx_log: +# name : 'nginx_log' +# path : '/var/log/nginx/access.log' + +#- name: configure netdata haproxy +# blockinfile: +# path: /etc/netdata/python.d/haproxy.conf +# block: | +# # Managed by ansible, see roles/base-bookworm/tasks/main.yml +# update_every: 5 +# via_url: +# url: 'http://127.0.0.1:7000/haproxy_stats;csv;norefresh' + +- name: restart netdata service + tags: netdata, timezone + systemd: + name: netdata.service + state: restarted + + +- name: install systemd-resolved + tags: resolved + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - systemd-resolved + +- name: configure systemd-resolved + tags: resolved + template: + src: resolved.conf + dest: /etc/systemd/resolved.conf + +- name: restart systemd-resolved + tags: resolved + systemd: + name: systemd-resolved.service + state: restarted + +- name: test systemd-resolved + tags: resolved + shell: resolvectl query go.dnscheck.tools --cache=no diff --git a/ansible/roles/base-bookworm/templates/internal-deb.gpg b/ansible/roles/base-bookworm/templates/internal-deb.gpg new file mode 100644 index 00000000..28126a36 --- /dev/null +++ b/ansible/roles/base-bookworm/templates/internal-deb.gpg @@ -0,0 +1,14 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mDMEYGISFRYJKwYBBAHaRw8BAQdA4VxoR0gSsH56BbVqYdK9HNQ0Dj2YFVbvKIIZ +JKlaW920Mk9PTkkgcGFja2FnZSBzaWduaW5nIDxjb250YWN0QG9wZW5vYnNlcnZh +dG9yeS5vcmc+iJYEExYIAD4WIQS1oI8BeW5/UhhhtEk3LR/ycfLdUAUCYGISFQIb +AwUJJZgGAAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRA3LR/ycfLdUFk+AQCb +gsUQsAQGxUFvxk1XQ4RgEoh7wy2yTuK8ZCkSHJ0HWwD/f2OAjDigGq07uJPYw7Uo +Ih9+mJ/ubwiPMzUWF6RSdgu4OARgYhIVEgorBgEEAZdVAQUBAQdAx4p1KerwcIhX +HfM9LbN6Gi7z9j4/12JKYOvr0d0yC30DAQgHiH4EGBYIACYWIQS1oI8BeW5/Uhhh +tEk3LR/ycfLdUAUCYGISFQIbDAUJJZgGAAAKCRA3LR/ycfLdUL4cAQCs53fLphhy +6JMwVhRs02LXi1lntUtw1c+EMn6t7XNM6gD+PXpbgSZwoV3ZViLqr58o9fZQtV3s +oN7jfdbznrWVigE= +=PtYb +-----END PGP PUBLIC KEY BLOCK----- diff --git a/ansible/roles/base-bookworm/templates/journald.conf b/ansible/roles/base-bookworm/templates/journald.conf new file mode 100644 index 00000000..d7ae85e1 --- /dev/null +++ b/ansible/roles/base-bookworm/templates/journald.conf @@ -0,0 +1,8 @@ +[Journal] +Storage=persistent +Compress=yes +#RateLimitIntervalSec=30s +#RateLimitBurst=10000 +SystemMaxFileSize=200M +RuntimeMaxFileSize=1G +ForwardToSyslog=no diff --git a/ansible/roles/base-bookworm/templates/netdata.conf b/ansible/roles/base-bookworm/templates/netdata.conf new file mode 100644 index 00000000..e2bef302 --- /dev/null +++ b/ansible/roles/base-bookworm/templates/netdata.conf @@ -0,0 +1,32 @@ +# Managed by ansible, see roles/base-bookworm/tasks/main.yml +[global] + run as user = netdata + web files owner = root + web files group = root + bind socket to IP = 0.0.0.0 + +[plugins] + python.d = yes + + +[statsd] + enabled = yes + # decimal detail = 1000 + update every (flushInterval) = 1 + # udp messages to process at once = 10 + # create private charts for metrics matching = * + max private charts allowed = 10000 + max private charts hard limit = 10000 + private charts memory mode = ram + private charts history = 300 + # histograms and timers percentile (percentThreshold) = 95.00000 + # add dimension for number of events received = no + # gaps on gauges (deleteGauges) = no + # gaps on counters (deleteCounters) = no + # gaps on meters (deleteMeters) = no + # gaps on sets (deleteSets) = no + # gaps on histograms (deleteHistograms) = no + # gaps on timers (deleteTimers) = no + # listen backlog = 4096 + # default port = 8125 + # bind to = udp:localhost:8125 tcp:localhost:8125 diff --git a/ansible/roles/base-bookworm/templates/ooni_internal.sources b/ansible/roles/base-bookworm/templates/ooni_internal.sources new file mode 100644 index 00000000..f85bc625 --- /dev/null +++ b/ansible/roles/base-bookworm/templates/ooni_internal.sources @@ -0,0 +1,7 @@ +Architectures: amd64 +Suites: unstable +Uris: https://ooni-internal-deb.s3.eu-central-1.amazonaws.com +Types: deb +Components: main +Enabled: yes +Signed-By: /etc/ooni/internal-deb.gpg diff --git a/ansible/roles/base-bookworm/templates/resolved.conf b/ansible/roles/base-bookworm/templates/resolved.conf new file mode 100644 index 00000000..dd937e3c --- /dev/null +++ b/ansible/roles/base-bookworm/templates/resolved.conf @@ -0,0 +1,9 @@ +# Deployed by ansible +# See roles/base-bookworm/templates/resolved.conf + +[Resolve] +## https://meta.wikimedia.org/wiki/Wikimedia_DNS +DNS=185.71.138.138 +DNSOverTLS=opportunistic +DNSSEC=allow-downgrade +Cache=yes diff --git a/ansible/roles/base-bookworm/templates/sources.list b/ansible/roles/base-bookworm/templates/sources.list new file mode 100644 index 00000000..7432ddad --- /dev/null +++ b/ansible/roles/base-bookworm/templates/sources.list @@ -0,0 +1,6 @@ +# Managed by ansible +# roles/base-bookworm/templates/sources.list + +deb http://deb.debian.org/debian bookworm main contrib non-free-firmware +deb http://deb.debian.org/debian-security/ bookworm-security main contrib non-free-firmware +deb http://deb.debian.org/debian bookworm-backports main diff --git a/ansible/roles/dehydrated/README.adoc b/ansible/roles/dehydrated/README.adoc new file mode 100644 index 00000000..477601de --- /dev/null +++ b/ansible/roles/dehydrated/README.adoc @@ -0,0 +1,10 @@ + +Configure dehydrated to generate certificates (locally to each server) + +- listen on port 443 for ACME challenge + +- ansible --diff is supported + +- generate certificate expirations metrics for node exporter + +- changes to /etc are also tracked locally by etckeeper diff --git a/ansible/roles/dehydrated/meta/main.yml b/ansible/roles/dehydrated/meta/main.yml new file mode 100644 index 00000000..e7e996b0 --- /dev/null +++ b/ansible/roles/dehydrated/meta/main.yml @@ -0,0 +1,5 @@ +--- +dependencies: + - nginx-buster +... + diff --git a/ansible/roles/dehydrated/tasks/main.yml b/ansible/roles/dehydrated/tasks/main.yml new file mode 100644 index 00000000..0bfaf7c3 --- /dev/null +++ b/ansible/roles/dehydrated/tasks/main.yml @@ -0,0 +1,108 @@ +--- +- name: Installs packages + tags: dehydrated + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - dehydrated + +#- name: create dehydrated hook file +# # This hook is called after getting a new cert to deploy it +# template: +# src: templates/hook.sh +# dest: /etc/dehydrated/hook.sh +# mode: 0755 +# owner: root +# +# +#- name: set dehydrated hook +# blockinfile: +# path: /etc/dehydrated/config +# block: | +# HOOK="/etc/dehydrated/hook.sh" + +- name: Add ACME dedicated sites-enabled file + tags: dehydrated + template: + src: templates/letsencrypt-http + # the server block matches all SSL FQDNs and must be + # parsed first, hence 00- + dest: /etc/nginx/sites-enabled/00-letsencrypt-http + mode: 0644 + owner: root + +- name: Add canary file to ensure /.well-known/acme-challenge is reachable by let's encrypt + tags: dehydrated + copy: + content: | + Generated by ansible using ansible/roles/dehydrated/tasks/main.yml. + + Also, meow!!! + dest: /var/lib/dehydrated/acme-challenges/ooni-acme-canary + mode: 0644 + owner: root + +- name: reload nginx + tags: dehydrated + shell: systemctl reload nginx.service + +- name: allow incoming TCP connections to Nginx on port 80 + tags: dehydrated + blockinfile: + path: /etc/ooni/nftables/tcp/80.nft + create: yes + block: | + add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" + +- name: reload nftables service + tags: dehydrated + shell: systemctl reload nftables.service + +- name: Configure domains {{ ssl_domains }} + # https://github.com/dehydrated-io/dehydrated/blob/master/docs/domains_txt.md + tags: dehydrated + template: + src: templates/domains.txt.j2 + dest: /etc/dehydrated/domains.txt + +- name: Register account if needed + tags: dehydrated + ansible.builtin.shell: + cmd: "test -d /var/lib/dehydrated/accounts || dehydrated --register --accept-terms" + +- name: Install dehydrated.service + tags: dehydrated + template: + src: templates/dehydrated.service + dest: /etc/systemd/system/dehydrated.service + mode: 0644 + owner: root + +- name: Install dehydrated.timer + tags: dehydrated + template: + src: templates/dehydrated.timer + dest: /etc/systemd/system/dehydrated.timer + mode: 0644 + owner: root + +- name: Ensure timer runs + tags: dehydrated + systemd: + name: dehydrated.timer + state: started + enabled: yes + +- name: Run dehydrated service immediately + # creates: + # /var/lib/dehydrated/certs//chain.pem cert.pem privkey.pem fullchain.pem + tags: dehydrated + systemd: + name: dehydrated.service + state: started + enabled: yes + +- name: reload nginx + tags: dehydrated + shell: systemctl reload nginx.service diff --git a/ansible/roles/dehydrated/templates/dehydrated.service b/ansible/roles/dehydrated/templates/dehydrated.service new file mode 100644 index 00000000..50ffdc46 --- /dev/null +++ b/ansible/roles/dehydrated/templates/dehydrated.service @@ -0,0 +1,13 @@ +[Unit] +Description=Run dehydrated certificate refresh + +[Service] +Type=oneshot +#User=dehydrated +#Group=dehydrated +ProtectSystem=strict +ProtectHome=yes +ReadWritePaths=/var/lib/dehydrated +PrivateTmp=yes +ExecStart=/usr/bin/dehydrated --cron +ExecStartPost=+/bin/systemctl reload nginx.service diff --git a/ansible/roles/dehydrated/templates/dehydrated.timer b/ansible/roles/dehydrated/templates/dehydrated.timer new file mode 100644 index 00000000..5e6ea784 --- /dev/null +++ b/ansible/roles/dehydrated/templates/dehydrated.timer @@ -0,0 +1,9 @@ +[Unit] +Description=Run dehydrated certificate refresh + +[Timer] +OnCalendar=Mon 13:00 + +[Install] +WantedBy=timers.target + diff --git a/ansible/roles/dehydrated/templates/domains.txt.j2 b/ansible/roles/dehydrated/templates/domains.txt.j2 new file mode 100644 index 00000000..5850d203 --- /dev/null +++ b/ansible/roles/dehydrated/templates/domains.txt.j2 @@ -0,0 +1 @@ +{% for d in ssl_domains %}{{ d }} {% endfor %} diff --git a/ansible/roles/dehydrated/templates/hook.sh b/ansible/roles/dehydrated/templates/hook.sh new file mode 100644 index 00000000..26193aeb --- /dev/null +++ b/ansible/roles/dehydrated/templates/hook.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +# Deployed by ansible +# see ansible/roles/dehydrated/templates/hook.sh +# +deploy_cert() { + local DOMAIN="${1}" KEYFILE="${2}" CERTFILE="${3}" FULLCHAINFILE="${4}" CHAINFILE="${5}" TIMESTAMP="${6}" + # This hook is called once for each certificate that has been produced. + # Parameters: + # - DOMAIN The primary domain name, i.e. the certificate common name (CN). + # - KEYFILE The path of the file containing the private key. + # - CERTFILE The path of the file containing the signed certificate. + # - FULLCHAINFILE The path of the file containing the full certificate chain. + # - CHAINFILE The path of the file containing the intermediate certificate(s). + # - TIMESTAMP Timestamp when the specified certificate was created. + + logger "Deploying SSL certificate $DOMAIN $KEYFILE $CERTFILE $FULLCHAINFILE $CHAINFILE $TIMESTAMP" + # cp ... + #systemctl reload nginx +} diff --git a/ansible/roles/dehydrated/templates/letsencrypt-http b/ansible/roles/dehydrated/templates/letsencrypt-http new file mode 100644 index 00000000..41fda273 --- /dev/null +++ b/ansible/roles/dehydrated/templates/letsencrypt-http @@ -0,0 +1,13 @@ +# Generated by ansible +# roles/dehydrated/templates/letsencrypt-http + +server { + # Listen on port 80 for *any* domain + listen 80; + server_name _; + + # Serve ACME challenge from disk + location ^~ /.well-known/acme-challenge { + alias /var/lib/dehydrated/acme-challenges; + } +} diff --git a/ansible/roles/nftables-sysadmin/README.adoc b/ansible/roles/nftables-sysadmin/README.adoc new file mode 100644 index 00000000..e3bef58f --- /dev/null +++ b/ansible/roles/nftables-sysadmin/README.adoc @@ -0,0 +1,25 @@ +Install nftables based firewall + +Set up /etc/ooni/nftables/ + +Rules for specific services are *not* configured by this role + +When creating rules to accept TCP traffic from any IPv4/6 address, +files are named with the port number to detect collisions. + +Example (also see roles/nftables/tasks/main.yml): + +/etc/ooni/nftables/tcp/8080.nft + +``` +add rule inet filter input tcp dport 8080 counter accept comment "MyService" +``` + + +Otherwise: + +/etc/ooni/nftables/tcp/5432_postgres_internal.nft + +``` +add rule inet filter input ip saddr { 10.0.0.0/8, 192.168.0.0/16 } tcp dport 5432 counter accept comment "Internal PostgreSQL" +``` diff --git a/ansible/roles/nftables-sysadmin/tasks/main.yml b/ansible/roles/nftables-sysadmin/tasks/main.yml new file mode 100644 index 00000000..886c7ab0 --- /dev/null +++ b/ansible/roles/nftables-sysadmin/tasks/main.yml @@ -0,0 +1,49 @@ +--- +- name: Install nftables + tags: nftables + apt: + cache_valid_time: 86400 + name: nftables + +- name: create config dir + tags: nftables + file: + path: /etc/ooni/nftables/tcp + state: directory + owner: root + group: root + mode: 0755 + +- name: allow SSH + tags: nftables + blockinfile: + path: /etc/ooni/nftables/tcp/22.nft + create: yes + block: | + add rule inet filter input tcp dport 22 counter accept comment "Incoming SSH" + +- name: Overwrite nftables.conf + tags: nftables + template: + src: templates/nftables.conf + dest: /etc/nftables.conf + mode: 0755 + owner: root + +- name: enable nftables service + tags: nftables + shell: systemctl enable nftables.service + +- name: enable nftables service + tags: nftables + shell: systemctl start nftables.service + +#- name: Enable and start nftables service +# systemd: +# name: nftables.service +# state: reloaded +# enabled: yes + +- name: reload nftables service + tags: nftables + shell: systemctl reload nftables.service diff --git a/ansible/roles/nftables-sysadmin/templates/nftables.conf b/ansible/roles/nftables-sysadmin/templates/nftables.conf new file mode 100755 index 00000000..5f7b50cc --- /dev/null +++ b/ansible/roles/nftables-sysadmin/templates/nftables.conf @@ -0,0 +1,41 @@ +#!/usr/sbin/nft -f +# +# Nftables configuration script +# +# Managed by ansible +# roles/nftables/templates/nftables.conf +# +# The ruleset is applied atomically + +flush ruleset + +table inet filter { + chain input { + type filter hook input priority 0; + policy drop; + iif lo accept comment "Accept incoming traffic from localhost" + ct state invalid drop + ct state established,related accept comment "Accept traffic related to outgoing connections" + icmp type echo-request accept + icmpv6 type echo-request counter packets 0 bytes 0 accept + icmpv6 type { nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert } ip6 hoplimit 1 accept + icmpv6 type { nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert } ip6 hoplimit 255 counter packets 1 bytes 72 accept + } + + chain forward { + type filter hook forward priority 0; + policy accept; + } + + chain output { + type filter hook output priority 0; + policy accept; + } +} + +# Configure TCP traffic rules +include "/etc/ooni/nftables/tcp/*.nft" + +# Configure any other rule +include "/etc/ooni/nftables/*.nft" + diff --git a/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem b/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem new file mode 100644 index 00000000..9b182b72 --- /dev/null +++ b/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem @@ -0,0 +1,8 @@ +-----BEGIN DH PARAMETERS----- +MIIBCAKCAQEA//////////+t+FRYortKmq/cViAnPTzx2LnFg84tNpWp4TZBFGQz ++8yTnc4kmz75fS/jY2MMddj2gbICrsRhetPfHtXV/WVhJDP1H18GbtCFY2VVPe0a +87VXE15/V8k1mE8McODmi3fipona8+/och3xWKE2rec1MKzKT0g6eXq8CrGCsyT7 +YdEIqUuyyOP7uWrat2DX9GgdT0Kj3jlN9K5W7edjcrsZCwenyO4KbXCeAvzhzffi +7MA0BM0oNC9hkXL+nOmFg/+OTxIy7vKBg8P+OxtMb61zO7X8vC7CIAXFjvGDfRaD +ssbzSibBsu/6iGtCOGEoXJf//////////wIBAg== +-----END DH PARAMETERS----- diff --git a/ansible/roles/nginx-buster/files/ssl_intermediate.conf b/ansible/roles/nginx-buster/files/ssl_intermediate.conf new file mode 100644 index 00000000..96d2e6e2 --- /dev/null +++ b/ansible/roles/nginx-buster/files/ssl_intermediate.conf @@ -0,0 +1,3 @@ +# Oldest compatible clients: Firefox 1, Chrome 1, IE 7, Opera 5, Safari 1, Windows XP IE8, Android 2.3, Java 7 +ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE +ssl_ciphers 'ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA:ECDHE-ECDSA-DES-CBC3-SHA:ECDHE-RSA-DES-CBC3-SHA:EDH-RSA-DES-CBC3-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:!DSS'; diff --git a/ansible/roles/nginx-buster/files/ssl_modern.conf b/ansible/roles/nginx-buster/files/ssl_modern.conf new file mode 100644 index 00000000..9ad7c11d --- /dev/null +++ b/ansible/roles/nginx-buster/files/ssl_modern.conf @@ -0,0 +1,4 @@ +# Oldest compatible clients: Firefox 27, Chrome 30, IE 11 on Windows 7, Edge, Opera 17, Safari 9, Android 5.0, and Java 8 +ssl_protocols TLSv1.2; +ssl_ciphers 'ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256'; +# NB: technically, it does not require ssl_dhparam as it has no DHE, only ECDHE. diff --git a/ansible/roles/nginx-buster/handlers/main.yml b/ansible/roles/nginx-buster/handlers/main.yml new file mode 100644 index 00000000..b180da14 --- /dev/null +++ b/ansible/roles/nginx-buster/handlers/main.yml @@ -0,0 +1,6 @@ +- name: restart nginx + service: name=nginx state=restarted +- name: start nginx + service: name=nginx state=started +- name: reload nginx + service: name=nginx state=reloaded diff --git a/ansible/roles/nginx-buster/tasks/main.yml b/ansible/roles/nginx-buster/tasks/main.yml new file mode 100644 index 00000000..30f37e57 --- /dev/null +++ b/ansible/roles/nginx-buster/tasks/main.yml @@ -0,0 +1,33 @@ +--- +- name: install stable nginx + apt: + name: nginx + cache_valid_time: 86400 + notify: start nginx + +# https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 +# +# Guide https://wiki.mozilla.org/Security/Server_Side_TLS#Pre-defined_DHE_groups +# suggests ffdhe2048 instead of `openssl dhparam` to avoid https://weakdh.org/ +- name: copy nginx configuration snippets + copy: src={{item}} dest=/etc/nginx/{{ item }} mode=0444 owner=root group=root + with_items: + - ffdhe2048_dhparam.pem # ffdhe2048 Diffie-Hellman parameters + - ssl_intermediate.conf + - ssl_modern.conf + +- name: remove `default` vhost + file: path={{item}} state=absent + #notify: reload nginx + with_items: + - /etc/nginx/conf.d/default.conf + - /etc/nginx/sites-available/default + - /etc/nginx/sites-enabled/default + +- name: set nginx.conf + template: + src=nginx.conf + dest=/etc/nginx/nginx.conf + mode=0444 + #notify: reload nginx +... diff --git a/ansible/roles/nginx-buster/templates/nginx.conf b/ansible/roles/nginx-buster/templates/nginx.conf new file mode 100644 index 00000000..f9e742a1 --- /dev/null +++ b/ansible/roles/nginx-buster/templates/nginx.conf @@ -0,0 +1,68 @@ + +# Managed by ansible +# roles/nginx-buster/templates/nginx.conf +# +# Generated with: +# https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 +# + +user www-data; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; + +events { + worker_connections 768; + # multi_accept on; +} + +http { + + # Basic Settings + + sendfile on; + tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet + types_hash_max_size 2048; + # server_tokens off; + + # server_names_hash_bucket_size 64; + # server_name_in_redirect off; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging Settings + + # anonymize ipaddr + map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; + } + + # log anonymized ipaddr and caching status + log_format ooni_nginx_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' + '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"'; + + + access_log syslog:server=unix:/dev/log ooni_nginx_fmt; + error_log syslog:server=unix:/dev/log; + + # Gzip Settings + + gzip on; + + # gzip_vary on; + # gzip_proxied any; + # gzip_comp_level 6; + # gzip_buffers 16 8k; + # gzip_http_version 1.1; + # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + + # Virtual Host Configs + + include /etc/nginx/conf.d/*.conf; + include /etc/nginx/sites-enabled/*; + +} + diff --git a/ansible/roles/ooni-backend/handlers/main.yml b/ansible/roles/ooni-backend/handlers/main.yml new file mode 100644 index 00000000..84d0f4f1 --- /dev/null +++ b/ansible/roles/ooni-backend/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: reload nftables + service: name=nftables state=reloaded + +- name: restart clickhouse + service: name=clickhouse-server state=restarted diff --git a/ansible/roles/ooni-backend/meta/main.yml b/ansible/roles/ooni-backend/meta/main.yml new file mode 100644 index 00000000..c82f9e2d --- /dev/null +++ b/ansible/roles/ooni-backend/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: nftables diff --git a/ansible/roles/ooni-backend/tasks/main.yml b/ansible/roles/ooni-backend/tasks/main.yml new file mode 100644 index 00000000..55c56bf6 --- /dev/null +++ b/ansible/roles/ooni-backend/tasks/main.yml @@ -0,0 +1,822 @@ +--- + +## API ## + +- name: install API if not present + # do not update package if present + tags: api + apt: + cache_valid_time: '{{ apt_cache_valid_time }}' + name: ooni-api + state: present + update_cache: yes + +- name: create Nginx cache dir + file: + path: /var/cache/nginx/ooni-api + state: directory + +- name: configure test api + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: api + template: + src: api.conf + dest: /etc/ooni/api.conf + owner: ooniapi + group: ooniapi + mode: 0640 + vars: + collectors: [] + # bucket_name and collector_id must match the uploader + collector_id: 2 + bucket_name: ooni-data-eu-fra-test + github_push_repo: "ooni-bot/test-lists" + github_origin_repo: "ooni/test-lists" + login_base_url: "https://test-lists.test.ooni.org/login" + pg_uri: "" + clickhouse_url: clickhouse://api:api@localhost/default + # mail_smtp_password: "DISABLED" + # jwt_encryption_key and account_id_hashing_key are taken from the vault + +- name: configure backend-hel api + when: inventory_hostname == 'backend-hel.ooni.org' + tags: api + template: + src: api.conf + dest: /etc/ooni/api.conf + owner: ooniapi + group: ooniapi + mode: 0640 + vars: + collectors: ['backend-hel.ooni.org',] + # bucket_name and collector_id must match the uploader + collector_id: 3 + # test bucket + bucket_name: ooni-data-eu-fra-test + # test GH repo + github_push_repo: "ooni-bot/test-lists" + github_origin_repo: "citizenlab/test-lists" + login_base_url: "https://test-lists.ooni.org/login" + pg_uri: "" + clickhouse_url: clickhouse://api:api@localhost/default + base_url: "https://backend-hel.ooni.org" + +- name: configure backend-fsn api + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: api + template: + src: api.conf + dest: /etc/ooni/api.conf + owner: ooniapi + group: ooniapi + mode: 0640 + vars: + collectors: ['backend-fsn.ooni.org', 'ams-pg.ooni.org'] + # bucket_name and collector_id must match the uploader + collector_id: 1 + bucket_name: ooni-data-eu-fra + github_push_repo: "ooni/test-lists" + github_origin_repo: "citizenlab/test-lists" + login_base_url: "https://test-lists.ooni.org/login" + pg_uri: "" + clickhouse_url: clickhouse://api:api@localhost/default + base_url: "https://api.ooni.io" + +- name: configure prod api + when: inventory_hostname == 'ams-pg.ooni.org' + tags: api + template: + src: api.conf + dest: /etc/ooni/api.conf + owner: ooniapi + group: ooniapi + mode: 0640 + vars: + collectors: ['backend-fsn.ooni.org', 'ams-pg.ooni.org'] + # collector_id must match the uploader + collector_id: 0 + bucket_name: ooni-data-eu-fra + github_push_repo: "ooni/test-lists" + github_origin_repo: "citizenlab/test-lists" + login_base_url: "https://test-lists.ooni.org/login" + pg_uri: "postgresql://shovel:yEqgNr2eXvgG255iEBxVeP@localhost/metadb" + clickhouse_url: "" + +- name: create Psiphon conffile + tags: api + copy: + content: "{{ psiphon_config }}" + dest: /etc/ooni/psiphon_config.json + +- name: Write Tor targets conffile + tags: api + template: + src: tor_targets.json + dest: /etc/ooni/tor_targets.json + +- name: configure api uploader using test bucket + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: api + template: + src: templates/api-uploader.conf + dest: /etc/ooni/api-uploader.conf + vars: + # bucket_name and collector_id must match the API + bucket_name: ooni-data-eu-fra-test + collector_id: 2 + +- name: configure FSN api uploader using PROD bucket + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: api + template: + src: templates/api-uploader.conf + dest: /etc/ooni/api-uploader.conf + vars: + # bucket_name and collector_id must match the API + bucket_name: ooni-data-eu-fra + collector_id: 1 + +# - name: configure HEL api uploader using test bucket +# when: inventory_hostname == 'backend-hel.ooni.org' +# tags: api +# template: +# src: templates/api-uploader.conf +# dest: /etc/ooni/api-uploader.conf +# vars: +# # bucket_name and collector_id must match the API +# bucket_name: ooni-data-eu-fra-test +# collector_id: 3 + + +## Haproxy and nginx ## + +- name: configure api uploader using PROD bucket + when: inventory_hostname == 'ams-pg.ooni.org' + tags: api + template: + src: templates/api-uploader.conf + dest: /etc/ooni/api-uploader.conf + vars: + # bucket_name and collector_id must match the API + bucket_name: ooni-data-eu-fra + collector_id: 0 + +- name: Overwrite API nginx test conf + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: api, webserv + template: + src: templates/nginx-api-test.conf + dest: /etc/nginx/sites-available/ooni-api.conf + mode: 0755 + owner: root + vars: + # Uses dehydrated + certpath: /var/lib/dehydrated/certs/ + +- name: Overwrite API nginx HEL conf + when: inventory_hostname == 'backend-hel.ooni.org' + tags: api, webserv + template: + src: templates/nginx-api-test.conf + dest: /etc/nginx/sites-available/ooni-api.conf + mode: 0755 + owner: root + vars: + # Uses dehydrated + certpath: /var/lib/dehydrated/certs/ + +- name: install haproxy if not present + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: webserv + apt: + cache_valid_time: 86400 + name: haproxy + state: present + +- name: Deploy haproxy conf + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, webserv + template: + src: templates/haproxy.cfg + dest: /etc/haproxy/haproxy.cfg + mode: 0755 + owner: root + vars: + # Uses dehydrated + certpath: /var/lib/dehydrated/certs/ + +- name: Delete old files + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, webserv + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - /etc/nginx/sites-enabled/00-letsencrypt-http + - /etc/nginx/sites-enabled/deb_ooni_org + - /etc/nginx/sites-enabled/deb_ooni_org_http + +- name: Deploy dehydrated conf + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, webserv + template: + src: templates/dehydrated.config + dest: /etc/dehydrated/config + mode: 0755 + owner: root + +- name: Deploy dehydrated conf + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, webserv + template: + src: templates/dehydrated.config + dest: /etc/dehydrated/config + mode: 0755 + owner: root + +- name: Deploy dehydrated haproxy hook + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, webserv + template: + src: templates/dehydrated_haproxy_hook.sh + dest: /etc/dehydrated/haproxy_hook.sh + mode: 0755 + owner: root + +- name: Overwrite API nginx FSN conf + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: api, webserv + template: + src: templates/nginx-api-fsn.conf + dest: /etc/nginx/sites-available/ooni-api.conf + mode: 0755 + owner: root + vars: + # Uses dehydrated + certpath: /var/lib/dehydrated/certs/ + +- name: Overwrite API nginx prod conf + when: inventory_hostname == 'ams-pg.ooni.org' + tags: api, webserv + template: + src: templates/nginx-api-ams-pg.conf + dest: /etc/nginx/sites-available/ooni-api.conf + mode: 0755 + owner: root + vars: + certpath: /etc/letsencrypt/live/ + +- name: Deploy API gunicorn conf + tags: api + template: + src: api.gunicorn.py + dest: /etc/ooni/api.gunicorn.py + owner: ooniapi + group: ooniapi + mode: 0640 + +- name: Create symlink for API nginx conf + tags: api + file: + src=/etc/nginx/sites-available/ooni-api.conf + dest=/etc/nginx/sites-enabled/ooni-api.conf + state=link + +- name: Configure deb.ooni.org forwarder on FSN host + when: inventory_hostname in ('backend-fsn.ooni.org', ) + tags: deb_ooni_org + # Uses dehydrated + template: + src: deb_ooni_org.nginx.conf + dest: /etc/nginx/sites-enabled/deb_ooni_org + +- name: Configure deb-ci.ooni.org forwarder on test host + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: deb_ooni_org + blockinfile: + path: /etc/nginx/sites-enabled/deb_ooni_org_http + create: yes + block: | + # Managed by ansible, see roles/ooni-backend/tasks/main.yml + server { + listen 80; + server_name deb-ci.ooni.org; + location / { + proxy_pass https://ooni-internal-deb.s3.eu-central-1.amazonaws.com/; + } + } + +- name: create badges dir + tags: api + file: + path: /var/www/package_badges/ + state: directory + +- name: Safely reload Nginx + # TODO remove restart after transition to haproxy + tags: api, deb_ooni_org, webserv + shell: nginx -t && systemctl reload nginx + +- name: Restart Nginx + tags: webserv + shell: nginx -t && systemctl restart nginx + +- name: Restart haproxy + # reload is not enough + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: api, deb_ooni_org, webserv + shell: systemctl restart haproxy + +- name: allow incoming TCP connections to API + tags: api + blockinfile: + path: /etc/ooni/nftables/tcp/443.nft + create: yes + block: | + add rule inet filter input tcp dport 443 counter accept comment "incoming HTTPS" + +- name: allow incoming TCP connections to haproxy metrics + tags: webserv + template: + src: 444.nft + dest: /etc/ooni/nftables/tcp/444.nft + +#- name: reload nftables service +# tags: api +# systemd: +# name: nftables.service +# state: reloaded + +- name: reload nftables service + tags: api, webserv + shell: systemctl reload nftables.service + + +## Fastpath ## + +- name: install fastpath if not present + # do not update package if present + when: inventory_hostname != 'backend-fsn.ooni.org' + tags: fastpath + apt: + cache_valid_time: 86400 + name: fastpath + state: present + +- name: configure fastpath on test + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: fastpath + template: + src: fastpath.conf + dest: /etc/ooni/fastpath.conf + owner: fastpath + group: fastpath + mode: 0640 + vars: + clickhouse_url: clickhouse://fastpath:fastpath@localhost/default + +- name: configure fastpath on FSN + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: fastpath + template: + src: fastpath.conf + dest: /etc/ooni/fastpath.conf + owner: fastpath + group: fastpath + mode: 0640 + vars: + clickhouse_url: clickhouse://fastpath:fastpath@localhost/default + + +- name: configure fastpath on HEL + when: inventory_hostname == 'backend-hel.ooni.org' + tags: fastpath + template: + src: fastpath.conf + dest: /etc/ooni/fastpath.conf + owner: fastpath + group: fastpath + mode: 0640 + vars: + clickhouse_url: clickhouse://fastpath:fastpath@localhost/default +- name: configure fastpath on ams-pg + when: inventory_hostname == 'ams-pg.ooni.org' + tags: fastpath + template: + src: fastpath.conf + dest: /etc/ooni/fastpath.conf + owner: fastpath + group: fastpath + mode: 0640 + vars: + clickhouse_url: + psql_uri: postgresql://shovel:yEqgNr2eXvgG255iEBxVeP@localhost/metadb + + +## Event detector ## + +#- name: install detector +# tags: detector +# apt: +# cache_valid_time: 86400 +# name: detector +# +#- name: configure detector +# tags: detector +# blockinfile: +# path: /etc/ooni/detector.conf +# create: yes +# block: | +# # Managed by ansible, see roles/ooni-backend/tasks/main.yml + + +## Analysis daemon ## + +- name: install analysis + # do not update package if present + when: inventory_hostname != 'backend-fsn.ooni.org' + tags: analysis + apt: + cache_valid_time: 86400 + name: analysis=1.4~pr408-209 + force: True + state: present + +- name: configure analysis + tags: analysis-conf + template: + src: analysis.conf + dest: /etc/ooni/analysis.conf + # Managed by ansible, see roles/ooni-backend/tasks/main.yml + + +## Test helper rotation ## + +- name: configure test helper rotation + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + blockinfile: + path: /etc/ooni/rotation.conf + create: yes + mode: 0400 + block: | + # Managed by ansible, see roles/ooni-backend/tasks/main.yml + [DEFAULT] + # Digital Ocean token + token = {{ digital_ocean_token }} + active_droplets_count = 4 + size_slug = s-1vcpu-1gb + image_name = debian-11-x64 + draining_time_minutes = 1440 + dns_zone = th.ooni.org + +- name: configure test helper rotation certbot + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + blockinfile: + path: /etc/ooni/certbot-digitalocean + create: yes + mode: 0400 + block: | + # Managed by ansible, see roles/ooni-backend/tasks/main.yml + dns_digitalocean_token = {{ digital_ocean_token }} + +- name: configure test helper rotation setup script + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + template: + src: rotation_setup.sh + dest: /etc/ooni/rotation_setup.sh + +- name: create test helper rotation nginx template + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + template: + src: rotation_nginx_conf + dest: /etc/ooni/rotation_nginx_conf + +- name: generate test helper rotation SSH keypair + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + openssh_keypair: + path: /etc/ooni/testhelper_ssh_key + owner: root + group: root + mode: 0400 + type: ed25519 + register: pubkey + +- name: print SSH pubkey + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + debug: msg={{ pubkey.public_key }} + +- name: Enable and start rotation service + tags: rotation + when: inventory_hostname == 'backend-fsn.ooni.org' + systemd: + daemon_reload: yes + enabled: yes + name: ooni-rotation.timer + state: started + + +## Tor daemon and onion service ## + + +- name: configure tor onion service hostname + when: inventory_hostname == 'ams-pg.ooni.org' + tags: tor + blockinfile: + path: /var/lib/tor/ooni_onion_service/hostname + create: yes + owner: debian-tor + group: debian-tor + mode: 0644 + block: guegdifjy7bjpequ.onion + +- name: configure tor onion service private_key + when: inventory_hostname == 'ams-pg.ooni.org' + tags: tor + blockinfile: + path: /var/lib/tor/ooni_onion_service/private_key + create: yes + owner: debian-tor + group: debian-tor + mode: 0600 + block: "{{ amspg_ooni_org_onion_key }}" + +- name: set tor onion service directory + when: inventory_hostname == 'ams-pg.ooni.org' + tags: tor + shell: | + chown debian-tor:debian-tor /var/lib/tor/ooni_onion_service + chmod 0700 /var/lib/tor/ooni_onion_service + + +# # Clickhouse # # + +- name: install APT HTTPS support + # do not update package if present + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + apt: + cache_valid_time: 86400 + state: present + name: + - apt-transport-https + - ca-certificates + - dirmngr + +- name: install clickhouse keys + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + command: apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 + +- name: set clickhouse repos + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + blockinfile: + path: /etc/apt/sources.list.d/clickhouse.list + create: yes + block: | + deb https://packages.clickhouse.com/deb lts main + +- name: pin clickhouse release train + when: inventory_hostname in ('backend-fsn.ooni.org', ) + tags: clickhouse + blockinfile: + path: /etc/apt/preferences.d/clickhouse-server + create: yes + block: | + Package: clickhouse-server + Pin: version 21.8.12.* + Pin-Priority: 999 + +- name: pin clickhouse release train + when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + blockinfile: + path: /etc/apt/preferences.d/clickhouse-server + create: yes + block: | + Package: clickhouse-server + Pin: version 23.8.2.* + Pin-Priority: 999 + +- name: install clickhouse on backend-fsn + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: clickhouse + apt: + # refresh cache + cache_valid_time: 0 + name: + - clickhouse-server={{ clickhouse_pkg_ver }} + - clickhouse-client={{ clickhouse_pkg_ver }} + - clickhouse-common-static={{ clickhouse_pkg_ver }} + vars: + clickhouse_pkg_ver: 21.8.12.* + +- name: install clickhouse on backend-hel + when: inventory_hostname == 'backend-hel.ooni.org' + tags: clickhouse + apt: + # refresh cache + cache_valid_time: 0 + name: + - clickhouse-server={{ clickhouse_pkg_ver }} + - clickhouse-client={{ clickhouse_pkg_ver }} + vars: + clickhouse_pkg_ver: 23.8.2.* + +- name: install clickhouse on ams-pg-test.ooni.org + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: clickhouse + apt: + # refresh cache + cache_valid_time: 0 + name: + - clickhouse-server={{ clickhouse_pkg_ver }} + - clickhouse-client={{ clickhouse_pkg_ver }} + - clickhouse-common-static={{ clickhouse_pkg_ver }} + vars: + clickhouse_pkg_ver: 23.8.2.* + +- name: install clickhouse conf override + when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + template: + src: clickhouse_config.xml + dest: /etc/clickhouse-server/config.d/ooni_conf.xml + owner: clickhouse + group: clickhouse + mode: 0400 + notify: restart clickhouse + +- name: allow incoming TCP connections from monitoring to Clickhouse prometheus interface + when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + blockinfile: + path: /etc/ooni/nftables/tcp/9363.nft + create: yes + block: | + add rule inet filter input ip saddr 5.9.112.244 tcp dport 9363 counter accept comment "clickhouse prometheus from monitoring.ooni.org" + notify: reload nftables + +- name: allow incoming TCP connections from jupiter on monitoring.ooni.org to Clickhouse + when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + blockinfile: + path: /etc/ooni/nftables/tcp/9000.nft + create: yes + block: | + add rule inet filter input ip saddr 5.9.112.244 tcp dport 9000 counter accept comment "clickhouse from monitoring.ooni.org" + notify: reload nftables + +- name: Run clickhouse + when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + systemd: + name: clickhouse-server.service + state: started + enabled: yes + +## Clickhouse access control ## +# https://clickhouse.com/docs/en/operations/access-rights/#enabling-access-control + +- name: Clickhouse - test admin user - failure is ok to ignore + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse-users + command: clickhouse-client -u admin --password admin -q 'select 1' + ignore_errors: true + register: admin_check + +- name: install tor python3-lxml + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + apt: + cache_valid_time: 86400 + name: python3-lxml + +- name: Clickhouse - set flag + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + # The users.xml file itself needs to be edited for this to work + xml: + path: /etc/clickhouse-server/users.xml + backup: yes + xpath: /clickhouse/users/default/{{ item }} + value: "1" + loop: + - access_management + - named_collection_control + - show_named_collections + - show_named_collections_secrets + register: users_xml + +- name: Clickhouse - restart immediately if needed + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + systemd: + name: clickhouse-server + state: restarted + +- name: Clickhouse - create admin + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + command: clickhouse-client -q "CREATE USER OR REPLACE admin IDENTIFIED WITH sha256_password BY 'admin' HOST LOCAL GRANTEES ANY" + # The server might be still starting: retry as needed + retries: 10 + delay: 5 + register: result + until: result.rc == 0 + +- name: Clickhouse - grant admin rights + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + command: clickhouse-client -q 'GRANT ALL ON *.* TO admin WITH GRANT OPTION' + +- name: Clickhouse - create readonly profile + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + template: + src: clickhouse_readonly.xml + dest: /etc/clickhouse-server/users.d/make_default_readonly.xml + owner: clickhouse + group: clickhouse + mode: 0640 + + #- name: Clickhouse - restore users.xml + # when: admin_check is defined and admin_check is failed + # tags: clickhouse-users + # command: mv {{ users_xml.backup_file }} /etc/clickhouse-server/users.xml + +- name: Clickhouse - restart immediately if needed + when: admin_check is defined and admin_check is failed + tags: clickhouse-users + systemd: + name: clickhouse-server + state: restarted + +- name: Clickhouse - setup users and permissions + tags: clickhouse-users + command: clickhouse-client -u admin --password admin -q "{{ item }}" + loop: + - "CREATE USER OR REPLACE api IDENTIFIED WITH sha256_password BY 'api' HOST LOCAL" + - "GRANT ALL ON *.* TO api" + - "CREATE USER OR REPLACE fastpath IDENTIFIED WITH sha256_password BY 'fastpath' HOST LOCAL" + - "GRANT ALL ON *.* TO fastpath" + +## end of Clickhouse access control ## + + + +- name: Run feeder on ams-pg-test + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: clickhouse + blockinfile: + path: /etc/ooni/clickhouse_feeder.conf + create: yes + block: | + [DEFAULT] + pg_dbuser = readonly + pg_dbhost = localhost + +- name: run feeder on backend-fsn + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: clickhouse + blockinfile: + path: /etc/ooni/clickhouse_feeder.conf + create: yes + block: | + [DEFAULT] + pg_dbuser = readonly + pg_dbhost = ams-pg.ooni.org + +- name: Run feeder + when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + tags: clickhouse + systemd: + name: ooni-clickhouse-feeder.service + state: started + enabled: yes + +- name: Run DB backup on ams-pg-test + when: inventory_hostname == 'ams-pg-test.ooni.org' + tags: dbbackup + template: + src: db-backup.conf + dest: /etc/ooni/db-backup.conf + mode: 0600 + vars: + public_bucket_name: ooni-data-eu-fra-test + +- name: Run DB backup on FSN + when: inventory_hostname == 'backend-fsn.ooni.org' + tags: dbbackup + template: + src: db-backup.conf + dest: /etc/ooni/db-backup.conf + mode: 0600 + vars: + public_bucket_name: ooni-data-eu-fra diff --git a/ansible/roles/ooni-backend/templates/444.nft b/ansible/roles/ooni-backend/templates/444.nft new file mode 100644 index 00000000..03f5106f --- /dev/null +++ b/ansible/roles/ooni-backend/templates/444.nft @@ -0,0 +1,2 @@ +# roles/ooni-backend/templates/444.nft +add rule inet filter input tcp dport 444 counter accept comment "incoming haproxy metrics" diff --git a/ansible/roles/ooni-backend/templates/analysis.conf b/ansible/roles/ooni-backend/templates/analysis.conf new file mode 100644 index 00000000..4df8a8ae --- /dev/null +++ b/ansible/roles/ooni-backend/templates/analysis.conf @@ -0,0 +1,9 @@ +# Managed by ansible, see roles/ooni-backend/tasks/main.yml +# [s3bucket] +# bucket_name = ooni-data-eu-fra-test +# aws_access_key_id = +# aws_secret_access_key = + +[backup] +# space separated +table_names = citizenlab fastpath jsonl diff --git a/ansible/roles/ooni-backend/templates/api-uploader.conf b/ansible/roles/ooni-backend/templates/api-uploader.conf new file mode 100644 index 00000000..2de0e399 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/api-uploader.conf @@ -0,0 +1,9 @@ +# OONI API measurement uploader - Python ini format +# Deployed by ansible, see roles/ooni-backend/templates/api-uploader.conf +[DEFAULT] +# arn:aws:iam::676739448697:user/ooni-pipeline, AWS: OONI Open Data +aws_access_key_id = AKIAJURD7T4DTN5JMJ5Q +aws_secret_access_key = {{ s3_ooni_open_data_access_key }} +bucket_name = {{ bucket_name }} +msmt_spool_dir = /var/lib/ooniapi/measurements +collector_id = {{ collector_id }} diff --git a/ansible/roles/ooni-backend/templates/api.conf b/ansible/roles/ooni-backend/templates/api.conf new file mode 100644 index 00000000..25d1d0c6 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/api.conf @@ -0,0 +1,60 @@ +# Deployed by ansible +# See ooni-backend/tasks/main.yml ooni-backend/templates/api.conf +# Syntax: treat it as a Python file, but only uppercase variables are used +COLLECTORS = {{ collectors }} +COLLECTOR_ID = {{ collector_id }} + +# Read-only database access +# The password is already made public +DATABASE_URI_RO = "{{ pg_uri }}" + +DATABASE_STATEMENT_TIMEOUT = 20 + +{% if clickhouse_url|length %} +USE_CLICKHOUSE = True +{% else %} +USE_CLICKHOUSE = False +{% endif %} + +CLICKHOUSE_URL = "{{ clickhouse_url }}" + + +BASE_URL = "{{ base_url }}" + +AUTOCLAVED_BASE_URL = "http://datacollector.infra.ooni.io/ooni-public/autoclaved/" +CENTRIFUGATION_BASE_URL = "http://datacollector.infra.ooni.io/ooni-public/centrifugation/" + +S3_ACCESS_KEY_ID = "AKIAJURD7T4DTN5JMJ5Q" +S3_BUCKET_NAME = "{{ bucket_name }}" +S3_SECRET_ACCESS_KEY = "CHANGEME" +S3_SESSION_TOKEN = "CHANGEME" +S3_ENDPOINT_URL = "CHANGEME" + +PSIPHON_CONFFILE = "/etc/ooni/psiphon_config.json" +TOR_TARGETS_CONFFILE = "/etc/ooni/tor_targets.json" + +JWT_ENCRYPTION_KEY = "{{ jwt_encryption_key }}" +ACCOUNT_ID_HASHING_KEY = "{{ account_id_hashing_key }}" + +SESSION_EXPIRY_DAYS = 180 +LOGIN_EXPIRY_DAYS = 365 + +# Registration email delivery +MAIL_SERVER = "mail.riseup.net" +MAIL_PORT = 465 +MAIL_USE_SSL = True +MAIL_USERNAME = "ooni-mailer" +MAIL_PASSWORD = "{{ mail_smtp_password }}" +MAIL_SOURCE_ADDRESS = "contact@ooni.org" +LOGIN_BASE_URL = "{{ login_base_url }}" + +GITHUB_WORKDIR = "/var/lib/ooniapi/citizenlab" +GITHUB_TOKEN = "{{ github_token }}" +GITHUB_USER = "ooni-bot" +GITHUB_ORIGIN_REPO = "{{ github_origin_repo }}" +GITHUB_PUSH_REPO = "{{ github_push_repo }}" + +# Measurement spool directory +MSMT_SPOOL_DIR = "/var/lib/ooniapi/measurements" +GEOIP_ASN_DB = "/var/lib/ooniapi/asn.mmdb" +GEOIP_CC_DB = "/var/lib/ooniapi/cc.mmdb" diff --git a/ansible/roles/ooni-backend/templates/api.gunicorn.py b/ansible/roles/ooni-backend/templates/api.gunicorn.py new file mode 100644 index 00000000..f86b6f67 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/api.gunicorn.py @@ -0,0 +1,12 @@ +# Gunicorn configuration file +# Managed by ansible, see roles/ooni-backend/tasks/main.yml +# and templates/api.gunicorn.py + +workers = 12 + +loglevel = "info" +proc_name = "ooni-api" +reuse_port = True +# Disabled statsd: https://github.com/benoitc/gunicorn/issues/2843 +#statsd_host = "127.0.0.1:8125" +#statsd_prefix = "ooni-api" diff --git a/ansible/roles/ooni-backend/templates/clickhouse_config.xml b/ansible/roles/ooni-backend/templates/clickhouse_config.xml new file mode 100644 index 00000000..e84e53ff --- /dev/null +++ b/ansible/roles/ooni-backend/templates/clickhouse_config.xml @@ -0,0 +1,41 @@ + + + + + information + + +{% if inventory_hostname == 'backend-fsn.ooni.org' %} + production + 20100100100 + +{% else %} + {{ inventory_hostname.replace(".ooni.org", "") }} +{% endif %} + +{% if inventory_hostname == 'ams-pg-test.ooni.org' %} + 500100100 + 3100100100 +{% endif %} + + + 0.0.0.0 + + + + + + + + + /metrics + 9363 + true + true + true + true + + diff --git a/ansible/roles/ooni-backend/templates/clickhouse_readonly.xml b/ansible/roles/ooni-backend/templates/clickhouse_readonly.xml new file mode 100644 index 00000000..73645616 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/clickhouse_readonly.xml @@ -0,0 +1,9 @@ + + + + + + readonly + + + diff --git a/ansible/roles/ooni-backend/templates/clickhouse_users.xml b/ansible/roles/ooni-backend/templates/clickhouse_users.xml new file mode 100644 index 00000000..49fd011a --- /dev/null +++ b/ansible/roles/ooni-backend/templates/clickhouse_users.xml @@ -0,0 +1,31 @@ + + + + + + + 1 + + + + + + + readonly + + 0.0.0.0 + + + + + + {{ clickhouse_writer_password|hash('sha256') }} + + 127.0.0.1 + + + + + + + diff --git a/ansible/roles/ooni-backend/templates/db-backup.conf b/ansible/roles/ooni-backend/templates/db-backup.conf new file mode 100644 index 00000000..4302f0ec --- /dev/null +++ b/ansible/roles/ooni-backend/templates/db-backup.conf @@ -0,0 +1,17 @@ +{ + "ver": 0, + "action": "export", + "public_aws_access_key_id": "AKIAJURD7T4DTN5JMJ5Q", + "public_aws_secret_access_key": "{{ s3_ooni_open_data_access_key }}", + "public_bucket_name": "{{ public_bucket_name }}", + "clickhouse_url": "clickhouse://localhost/default", + "__description": "tables can be backed up as: ignore, full, incremental, partition", + "backup_tables": { + "citizenlab": "ignore", + "fastpath": "ignore", + "jsonl": "ignore", + "msmt_feedback": "ignore", + "test_helper_instances": "ignore", + "url_priorities": "ignore" + } +} diff --git a/ansible/roles/ooni-backend/templates/deb_ooni_org.nginx.conf b/ansible/roles/ooni-backend/templates/deb_ooni_org.nginx.conf new file mode 100644 index 00000000..c069fd55 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/deb_ooni_org.nginx.conf @@ -0,0 +1,64 @@ +# Managed by ansible, see roles/ooni-backend/tasks/main.yml + +# anonymize ipaddr +map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + +# log anonymized ipaddr +log_format deb_ooni_org_logfmt '$remote_addr_anon [$time_local] ' + '"$request" $status snt:$body_bytes_sent rt:$request_time uprt:$upstream_response_time "$http_referer" "$http_user_agent"'; + +server { + listen 80; + server_name deb.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info deb_ooni_org_logfmt; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + resolver 127.0.0.1; + # Serve ACME challenge from disk + location ^~ /.well-known/acme-challenge { + alias /var/lib/dehydrated/acme-challenges; + } + location / { + proxy_pass https://ooni-deb.s3.eu-central-1.amazonaws.com/; + } +} + +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name deb.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info deb_ooni_org_logfmt; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + ssl_certificate /var/lib/dehydrated/certs/{{ inventory_hostname }}/fullchain.pem; + ssl_certificate_key /var/lib/dehydrated/certs/{{ inventory_hostname }}/privkey.pem; + ssl_trusted_certificate /var/lib/dehydrated/certs/{{ inventory_hostname }}/chain.pem; # for ssl_stapling_verify + + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + location / { + proxy_pass https://ooni-deb.s3.eu-central-1.amazonaws.com/; + } +} diff --git a/ansible/roles/ooni-backend/templates/dehydrated.config b/ansible/roles/ooni-backend/templates/dehydrated.config new file mode 100644 index 00000000..7a0293a2 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/dehydrated.config @@ -0,0 +1,7 @@ +# Deployed by ansible +# See roles/ooni-backend/templates/dehydrated.config +CONFIG_D=/etc/dehydrated/conf.d +BASEDIR=/var/lib/dehydrated +WELLKNOWN="${BASEDIR}/acme-challenges" +DOMAINS_TXT="/etc/dehydrated/domains.txt" +HOOK="/etc/dehydrated/haproxy_hook.sh" diff --git a/ansible/roles/ooni-backend/templates/dehydrated_haproxy_hook.sh b/ansible/roles/ooni-backend/templates/dehydrated_haproxy_hook.sh new file mode 100644 index 00000000..0e5b41f3 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/dehydrated_haproxy_hook.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Deployed by ansible +# See roles/ooni-backend/templates/dehydrated_haproxy_hook.sh +# +# Deploys chained privkey and certificates for haproxy +# Reloads haproxy as needed + +deploy_cert() { + local DOMAIN="${1}" KEYFILE="${2}" CERTFILE="${3}" FULLCHAINFILE="${4}" CHAINFILE="${5}" TIMESTAMP="${6}" + # Called once for each certificate + # /var/lib/dehydrated/certs/backend-hel.ooni.org/privkey.pem /var/lib/dehydrated/certs/backend-hel.ooni.org/cert.pem /var/lib/dehydrated/certs/backend-hel.ooni.org/fullchain.pem > /var/lib/dehydrated/certs/backend-hel.ooni.org/haproxy.pem + # cp "${KEYFILE}" "${FULLCHAINFILE}" /etc/nginx/ssl/; chown -R nginx: /etc/nginx/ssl + logger "deploy_cert hook reading ${KEYFILE} ${CERTFILE} ${FULLCHAINFILE}" + cat "${KEYFILE}" "${CERTFILE}" "${FULLCHAINFILE}" > "${KEYFILE}.haproxy" + logger "deploy_cert reloading haproxy" + systemctl reload haproxy.service +} + +HANDLER="$1"; shift +if [[ "${HANDLER}" =~ ^(deploy_cert)$ ]]; then + "$HANDLER" "$@" +fi diff --git a/ansible/roles/ooni-backend/templates/fastpath.conf b/ansible/roles/ooni-backend/templates/fastpath.conf new file mode 100644 index 00000000..031f49a0 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/fastpath.conf @@ -0,0 +1,15 @@ +# See roles/ooni-backend/tasks/main.yml +[DEFAULT] +collectors = localhost +{% if psql_uri is defined %} +# The password is already made public +db_uri = {{ psql_uri }} +{% else %} +db_uri = +{% endif %} +clickhouse_url = {{ clickhouse_url }} + +# S3 access credentials +# Currently unused +s3_access_key = +s3_secret_key = diff --git a/ansible/roles/ooni-backend/templates/haproxy.cfg b/ansible/roles/ooni-backend/templates/haproxy.cfg new file mode 100644 index 00000000..025a4fc2 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/haproxy.cfg @@ -0,0 +1,122 @@ +## Deployed by ansible, see roles/ooni-backend/templates/haproxy.cfg + +# Proxies to: +# - local nginx +# - remote test helpers +# See http://interactive.blockdiag.com/?compression=deflate&src=eJyFjjELwjAQhXd_xeFuEdpBEAURBwfBXSSk6ZkEr7mSZGgR_7tNXdoiuD2--7j3SmL1rKzU8FoAFEUOqz0Y2XhuuxSHICKLiCEKg9Sg3_bmSHHaujaxISRyuJ7hRrJEgh0slVTGOr28Txz2yvQvvYw44R617XGXMTubWU7HzXq26kfl8XISykgidBphVP-whLPuOtRRhIaZ_ogVlt8d7PVYDXkS3x_pgmPP + +global + log /dev/log local0 info alert + log /dev/log local1 notice alert + chroot /var/lib/haproxy + stats socket /run/haproxy/admin.sock mode 660 level admin + stats timeout 30s + user haproxy + group haproxy + daemon + + # Default SSL material locations + ca-base /etc/ssl/certs + crt-base /etc/ssl/private + + # See: https://ssl-config.mozilla.org/#server=haproxy&server-version=2.0.3&config=intermediate + ssl-default-bind-ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384 + ssl-default-bind-ciphersuites TLS_AES_128_GCM_SHA256:TLS_AES_256_GCM_SHA384:TLS_CHACHA20_POLY1305_SHA256 + ssl-default-bind-options ssl-min-ver TLSv1.2 no-tls-tickets + +defaults + log global + mode http + option httplog + option dontlognull + timeout connect 5000 + timeout client 50000 + timeout server 50000 + errorfile 400 /etc/haproxy/errors/400.http + errorfile 403 /etc/haproxy/errors/403.http + errorfile 408 /etc/haproxy/errors/408.http + errorfile 500 /etc/haproxy/errors/500.http + errorfile 502 /etc/haproxy/errors/502.http + errorfile 503 /etc/haproxy/errors/503.http + errorfile 504 /etc/haproxy/errors/504.http + + log-format "%[var(txn.src_ipaddr_masked)] %ft > %b > %s %TR/%Tw/%Tc/%Tr/%Ta %ST %B %CC %CS %tsc %ac/%fc/%bc/%sc/%rc %sq/%bq %hr %hs %{+Q}r" + +frontend haproxy_metrics + # Metrics exposed on TLS port 444 + # File generated by /etc/dehydrated/haproxy_hook.sh + bind :444 ssl crt /var/lib/dehydrated/certs/"{{ inventory_hostname }}"/privkey.pem.haproxy + + http-request set-var(txn.src_ipaddr_masked) src,ipmask(24,64) + + # /__haproxy_stats stats page + stats enable + stats uri /__haproxy_stats + stats refresh 5s + + # /__haproxy_prom_metrics prometheus metrics + http-request use-service prometheus-exporter if { path /__haproxy_prom_metrics } + + +frontend public_tls + # TLS on port 443 + # File generated by /etc/dehydrated/haproxy_hook.sh + bind :443 ssl crt /var/lib/dehydrated/certs/{{ inventory_hostname }}/privkey.pem.haproxy + + http-request set-var(txn.src_ipaddr_masked) src,ipmask(24,64) + + # test helpers + default_backend lb_test_helpers + + # deb.ooni.org + acl ACL_deb_ooni_org hdr(host) -i deb.ooni.org + use_backend deb_ooni_org if ACL_deb_ooni_org + + # Nginx + use_backend nginx if !{ path / } || !{ method POST } + + +frontend public_80 + # Forwarded to Nginx for ACME and deb.ooni.org + bind :80 + + http-request set-var(txn.src_ipaddr_masked) src,ipmask(24,64) + + # ACME + use_backend nginx if { path_beg /.well-known/acme-challenge } + + # deb.ooni.org + acl ACL_deb_ooni_org hdr(host) -i deb.ooni.org + use_backend deb_ooni_org if ACL_deb_ooni_org + + + +backend nginx + # Local Nginx is in front of the API and more. See diagram. + default-server check + option forwardfor + #option httpchk GET / + # forward to local nginx + server nginx localhost:17744 + + +backend lb_test_helpers + # Remote testn helpers + default-server check + option forwardfor + http-check send meth POST uri / hdr Content-Type application/json body "{}" + http-check send-state + http-check comment "TH POST with empty JSON" + + server th0 0.th.ooni.org:443 ssl verify none + server th1 1.th.ooni.org:443 ssl verify none + server th2 2.th.ooni.org:443 ssl verify none + server th3 3.th.ooni.org:443 ssl verify none + #option httpchk + + +backend deb_ooni_org + #default-server check + option forwardfor + server s3-ooni-deb ooni-deb.s3.eu-central-1.amazonaws.com ssl verify none + diff --git a/ansible/roles/ooni-backend/templates/nginx-api-ams-pg.conf b/ansible/roles/ooni-backend/templates/nginx-api-ams-pg.conf new file mode 100644 index 00000000..4e3cf934 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/nginx-api-ams-pg.conf @@ -0,0 +1,297 @@ +# Managed by ansible +# roles/ooni-backend/templates/nginx-api-ams-pg.conf + +# Use 2-level cache, 20MB of RAM + 5GB on disk, +proxy_cache_path /var/cache/nginx/ooni-api levels=1:2 keys_zone=apicache:100M + max_size=5g inactive=24h use_temp_path=off; + +# anonymize ipaddr +map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + +# log anonymized ipaddr and caching status +log_format ooni_api_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' + '"$request" $status snt:$body_bytes_sent rt:$request_time uprt:$upstream_response_time "$http_referer" "$http_user_agent"'; + +server { + # TODO(bassosimone): we need support for cleartext HTTP to make sure that requests + # over Tor correctly land to the proper backend. We are listening on this custom port + # and we are configuring Tor such that it routes traffic to this port. + listen 127.0.0.1:17744; + + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name _; + access_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info ooni_api_fmt; + error_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info; + gzip on; + + # TODO: we could use different client_max_body_size and SSL configurations for probe service paths + # and everyhing else + client_max_body_size 200M; # for measurement POST + + ssl_certificate {{ certpath }}{{ inventory_hostname }}/fullchain.pem; + ssl_certificate_key {{ certpath }}{{ inventory_hostname }}/privkey.pem; + ssl_trusted_certificate {{ certpath }}{{ inventory_hostname }}/chain.pem; # for ssl_stapling_verify + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + + # Registry + # Should match: + # - /api/v1/login + # - /api/v1/register + # - /api/v1/update + location ~^/api/v1/(login|register|update) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://registry.ooni.io:443; + } + + # Selectively route test-list/urls to the API + location ~^/api/v1/test-list/urls { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + } + + # Orchestrate + # Should match: + # - /api/v1/test-list + location ~^/api/v1/(test-list|urls) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://orchestrate.ooni.io:443; + } + + # Web Connectivity Test Helper + # Should match: + # - / + # - /status + # + # The fact that it responds to / means that we may have to differentiate + # via the Host record. + # TODO We should check if clients will respect a suffix added to by the + # bouncer in the returned field, otherwise new clients should use another + # form + location ~^/web-connectivity/(status) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://wcth.ooni.io; + } + + location /whoami { + return 200 "{{ inventory_hostname }}"; + } + + location /metrics { + return 200 ''; + } + + # Expose (only) Netdata badges + location ~ ^/netdata/badge { + rewrite ^/netdata/badge /api/v1/badge.svg break; + proxy_pass http://127.0.0.1:19999; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + # Expose package version badges + location /package_badges { + root /var/www; + add_header Pragma "no-cache"; + add_header Cache-Control "no-store, no-cache, must-revalidate, post-check=0, pre-check=0"; + } + + # Temporary redirection to backend-FSN + location ~ ^/api/v1/(aggregation|measurements|raw_measurement|measurement_meta) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location ~ ^/api/_/(asn_by_month|countries|countries_by_month|check_report_id|country_overview|global_overview|global_overview_by_month|im_networks|im_stats|network_stats) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location ~ ^/api/_/(test_coverage|website_networks|website_stats|website_urls|vanilla_tor_stats|test_names) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = /api/_/circumvention_stats_by_country { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = / { + # match "/" strictly, not as a prefix + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location ~ ^/static/ { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + # open and close reports, submit msmt + location ~ ^/report/ { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + # Auth, URL sumbission, URL priorities + location ~ ^/api/v1/(url-submission|get_account_role|set_account_role|set_session_expunge|user_login|user_register|user_logout) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location ~ ^/api/_/(url-priorities|account_metadata) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + location ~ ^/api/v1/(collectors|test-helpers|torsf_stats) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location ~ ^/(robots.txt|files) { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = /api/v1/test-list/tor-targets { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = /api/v1/test-list/urls { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = /bouncer/net-tests { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + location = /api/v1/test-list/psiphon-config { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + #location ~ ^/api/_/(test_names) { + # proxy_pass https://backend-fsn.ooni.org; + # proxy_set_header Host $host; + # proxy_set_header X-Real-IP $remote_addr; + #} + ## /files* tree + #location ~ ^/files { + # proxy_pass https://backend-fsn.ooni.org; + # proxy_set_header Host $host; + # proxy_set_header X-Real-IP $remote_addr; + #} + #location ~ ^/(health) { + # proxy_pass https://backend-fsn.ooni.org; + # proxy_set_header Host $host; + # proxy_set_header X-Real-IP $remote_addr; + #} + + # Temporary redirect + location = /api/v1/check-in { + proxy_pass https://backend-fsn.ooni.org; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + # new API + location / { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + } + + # Expose the measurement spool directory + location /measurement_spool/ { + alias /var/lib/ooniapi/measurements/incoming/; + autoindex off; + sendfile on; + tcp_nopush on; + if_modified_since off; + expires off; + etag off; + + gzip_comp_level 6; + gzip_min_length 1240; + gzip_proxied any; + gzip_types *; + gzip_vary on; + } +} diff --git a/ansible/roles/ooni-backend/templates/nginx-api-fsn.conf b/ansible/roles/ooni-backend/templates/nginx-api-fsn.conf new file mode 100644 index 00000000..9d6e1451 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/nginx-api-fsn.conf @@ -0,0 +1,260 @@ +# Managed by ansible +# roles/ooni-backend/templates/nginx-api-fsn.conf + +# Use 2-level cache, 20MB of RAM + 5GB on disk, +proxy_cache_path /var/cache/nginx/ooni-api levels=1:2 keys_zone=apicache:100M + max_size=5g inactive=24h use_temp_path=off; + +# anonymize ipaddr +map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + +# anonymize forwarded ipaddr +map $http_x_forwarded_for $remote_fwd_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + + +# log anonymized ipaddr and caching status +log_format ooni_api_fmt '$remote_addr_anon $remote_fwd_anon $upstream_cache_status [$time_local] ' + '"$request" $status snt:$body_bytes_sent rt:$request_time uprt:$upstream_response_time "$http_referer" "$http_user_agent"'; + +server { + # TODO(bassosimone): we need support for cleartext HTTP to make sure that requests + # over Tor correctly land to the proper backend. We are listening on this custom port + # and we are configuring Tor such that it routes traffic to this port. + listen 127.0.0.1:17744; + + listen 443 ssl http2 default_server; + listen [::]:443 ssl http2 default_server; + server_name _; + access_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info ooni_api_fmt; + error_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info; + gzip on; + gzip_types text/plain application/xml application/json; + + # TODO: we could use different client_max_body_size and SSL configurations for probe service paths + # and everyhing else + client_max_body_size 200M; # for measurement POST + + ssl_certificate {{ certpath }}{{ inventory_hostname }}/fullchain.pem; + ssl_certificate_key {{ certpath }}{{ inventory_hostname }}/privkey.pem; + ssl_trusted_certificate {{ certpath }}{{ inventory_hostname }}/chain.pem; # for ssl_stapling_verify + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + + # Registry + # Should match: + # - /api/v1/login + # - /api/v1/register + # - /api/v1/update + location ~^/api/v1/(login|register|update) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://registry.ooni.io:443; + } + + # Selectively route test-list/urls to the API + location ~^/api/v1/test-list/urls { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + # Orchestrate + # Should match: + # - /api/v1/test-list + location ~^/api/v1/(test-list|urls) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://orchestrate.ooni.io:443; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + + } + + # Web Connectivity Test Helper + # Should match: + # - / + # - /status + # + # The fact that it responds to / means that we may have to differentiate + # via the Host record. + # TODO We should check if clients will respect a suffix added to by the + # bouncer in the returned field, otherwise new clients should use another + # form + location ~^/web-connectivity/(status) { + proxy_http_version 1.1; + proxy_set_header Host $http_host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + + proxy_pass https://wcth.ooni.io; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + location /whoami { + return 200 "{{ inventory_hostname }}"; + } + + location /metrics { + return 200 ''; + } + + # Expose event detector RSS/atom feeds + location ~ ^/detector { + root /var/lib; + default_type application/xml; + } + + # Expose (only) Netdata badges + location ~ ^/netdata/badge { + rewrite ^/netdata/badge /api/v1/badge.svg break; + proxy_pass http://127.0.0.1:19999; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + } + + # Expose package version badges + location /package_badges { + root /var/www; + add_header Pragma "no-cache"; + add_header Cache-Control "no-store, no-cache, must-revalidate, post-check=0, pre-check=0"; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + # 2022-09-01 20:08 CEST temporarily block a bot scraping /files/download/* + location ~^/files/download/ { + return 301 https://explorer.ooni.org/; + } + + # new API + location / { + + # Protect /apidocs invoked with url= and/or urls= args + if ($uri ~ "^/apidocs") { set $block_apidocs X; } + if ($args ~ "url=" ) { set $block_apidocs "${block_apidocs}Y"; } + if ($args ~ "urls=" ) { set $block_apidocs "${block_apidocs}Y"; } + if ($block_apidocs ~ "XY") { return 403; } # nested "if" are not supported + + deny 216.244.66.0/24; # DotBot/1.2 + deny 114.119.128.0/19; # PetalBot + allow all; + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + + # match test-helper POST to / and forward traffic to a TH + if ($request_uri = "/") { set $forward_to_th "YE"; } + if ($request_method = POST) { set $forward_to_th "${forward_to_th}S"; } + if ($forward_to_th = "YES") { + proxy_pass https://0.th.ooni.org; + } + + set $external_remote_addr $remote_addr; + if ($remote_addr = "188.166.93.143") { + # If remote_addr is ams-pg-test trust the X-Real-IP header + set $external_remote_addr $http_x_real_ip; + } + if ($remote_addr = "142.93.237.101") { + # If remote_addr is ams-pg trust the X-Real-IP header + set $external_remote_addr $http_x_real_ip; + } + proxy_set_header X-Real-IP $external_remote_addr; + + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + # Expose the measurement spool directory + location /measurement_spool/ { + alias /var/lib/ooniapi/measurements/incoming/; + autoindex off; + sendfile on; + tcp_nopush on; + if_modified_since off; + expires off; + etag off; + } +} + +# Used by Netdata to monitor Nginx +server { + listen 127.0.0.1:80; + server_name localhost; + location = /stub_status { + stub_status; + } +} diff --git a/ansible/roles/ooni-backend/templates/nginx-api-test.conf b/ansible/roles/ooni-backend/templates/nginx-api-test.conf new file mode 100644 index 00000000..092d40db --- /dev/null +++ b/ansible/roles/ooni-backend/templates/nginx-api-test.conf @@ -0,0 +1,157 @@ +# Managed by ansible +# roles/ooni-backend/templates/nginx-api-test.conf + +# Use 2-level cache, 20MB of RAM + 5GB on disk, +proxy_cache_path /var/cache/nginx/ooni-api levels=1:2 keys_zone=apicache:100M + max_size=5g inactive=24h use_temp_path=off; + +# anonymize ipaddr +map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + +# anonymize forwarded ipaddr +map $http_x_forwarded_for $remote_fwd_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; +} + + +# log anonymized ipaddr and caching status +log_format ooni_api_fmt '$remote_addr_anon $remote_fwd_anon $upstream_cache_status [$time_local] ' + '"$request" $status snt:$body_bytes_sent rt:$request_time uprt:$upstream_response_time "$http_referer" "$http_user_agent"'; + +server { + # TODO(bassosimone): we need support for cleartext HTTP to make sure that requests + # over Tor correctly land to the proper backend. We are listening on this custom port + # and we are configuring Tor such that it routes traffic to this port. + listen 127.0.0.1:17744; + server_name _; + access_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info ooni_api_fmt; + error_log syslog:server=unix:/dev/log,tag=ooniapi,severity=info; + gzip on; + gzip_types text/plain application/xml application/json; + + # TODO: we could use different client_max_body_size and SSL configurations for probe service paths + # and everyhing else + client_max_body_size 200M; # for measurement POST + + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + + # use systemd-resolved + resolver 127.0.0.53; + + # Selectively route test-list/urls to the API + location ~^/api/v1/test-list/urls { + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + location /whoami { + return 200 "{{ inventory_hostname }}"; + } + + # Serve ACME challenge from disk + location ^~ /.well-known/acme-challenge { + alias /var/lib/dehydrated/acme-challenges; + } + + # 2022-09-01 20:08 CEST temporarily block a bot scraping /files/download/* + location ~^/files/download/ { + return 301 https://explorer.ooni.org/; + } + + # new API + location / { + + # Protect /apidocs invoked with url= and/or urls= args + if ($uri ~ "^/apidocs") { set $block_apidocs X; } + if ($args ~ "url=" ) { set $block_apidocs "${block_apidocs}Y"; } + if ($args ~ "urls=" ) { set $block_apidocs "${block_apidocs}Y"; } + if ($block_apidocs ~ "XY") { return 403; } # nested "if" are not supported + + deny 216.244.66.0/24; # DotBot/1.2 + deny 114.119.128.0/19; # PetalBot + allow all; + proxy_pass http://127.0.0.1:8000; + proxy_set_header Host $host; + + set $external_remote_addr $remote_addr; + if ($remote_addr = "188.166.93.143") { + # If remote_addr is ams-pg-test trust the X-Real-IP header + set $external_remote_addr $http_x_real_ip; + } + if ($remote_addr = "142.93.237.101") { + # If remote_addr is ams-pg trust the X-Real-IP header + set $external_remote_addr $http_x_real_ip; + } + proxy_set_header X-Real-IP $external_remote_addr; + + proxy_cache apicache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + proxy_cache_methods HEAD GET; + # Cache only 200, 301, and 302 by default and for very short. + # Overridden by the API using the Expires header + proxy_cache_valid 200 301 302 10s; + proxy_cache_valid any 0; + add_header x-cache-status $upstream_cache_status; + add_header X-Cache-Status $upstream_cache_status; + add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always; + add_header X-Frame-Options DENY always; + add_header X-Content-Type-Options nosniff always; + } + + # Expose the measurement spool directory + location /measurement_spool/ { + alias /var/lib/ooniapi/measurements/incoming/; + autoindex off; + sendfile on; + tcp_nopush on; + if_modified_since off; + expires off; + etag off; + } +} + +server { + # Forward deb.ooni.org to S3 + listen 17744; + server_name deb.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info ooni_api_fmt; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + resolver 127.0.0.53; + # Serve ACME challenge from disk + location ^~ /.well-known/acme-challenge { + alias /var/lib/dehydrated/acme-challenges; + } + location / { + proxy_pass https://ooni-deb.s3.eu-central-1.amazonaws.com/; + } +} diff --git a/ansible/roles/ooni-backend/templates/rotation_nginx_conf b/ansible/roles/ooni-backend/templates/rotation_nginx_conf new file mode 100644 index 00000000..63255e51 --- /dev/null +++ b/ansible/roles/ooni-backend/templates/rotation_nginx_conf @@ -0,0 +1,70 @@ +# Managed by ansible, see roles/ooni-backend/tasks/main.yml +# and roles/ooni-backend/templates/rotation_nginx_conf +# Deployed by rotation tool to the test-helper hosts +proxy_cache_path /var/cache/nginx levels=1:2 keys_zone=thcache:100M + max_size=5g inactive=24h use_temp_path=off; + +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name _; + gzip on; + ssl_certificate /etc/ssl/private/th_fullchain.pem; + ssl_certificate_key /etc/ssl/private/th_privkey.pem; + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + add_header Strict-Transport-Security "max-age=63072000" always; + ssl_stapling on; + ssl_stapling_verify on; + resolver 127.0.0.1; + # local test helper + location / { + proxy_set_header X-Forwarded-Proto $scheme; + proxy_read_timeout 900; + proxy_pass http://127.0.0.1:8080; + + proxy_cache thcache; + proxy_cache_min_uses 1; + proxy_cache_lock on; + proxy_cache_lock_timeout 30; + proxy_cache_lock_age 30; + proxy_cache_use_stale error timeout invalid_header updating; + # Cache POST without headers set by the test helper! + proxy_cache_methods POST; + proxy_cache_key "$request_uri|$request_body"; + proxy_cache_valid 200 10m; + proxy_cache_valid any 0; + add_header X-Cache-Status $upstream_cache_status; + + } +} + +# Used by Netdata to monitor Nginx +server { + listen 127.0.0.1:80; + server_name localhost; + + allow 5.9.112.244; # monitoring host + deny all; + + location = /stub_status { + stub_status; + } +} + +# Used by Prometheus to reach the TH +server { + listen 9001; + server_name localhost; + + allow 5.9.112.244; # monitoring host + deny all; + + location = /metrics { + proxy_pass http://127.0.0.1:9091; + } +} diff --git a/ansible/roles/ooni-backend/templates/rotation_setup.sh b/ansible/roles/ooni-backend/templates/rotation_setup.sh new file mode 100644 index 00000000..5706150c --- /dev/null +++ b/ansible/roles/ooni-backend/templates/rotation_setup.sh @@ -0,0 +1,114 @@ +#!/bin/bash +# Managed by ansible, see roles/ooni-backend/tasks/main.yml +# +# Configure test-helper droplet +# This script is run remotely on newly spawned VM by https://github.com/ooni/backend/blob/master/analysis/rotation.py +# It runs as root and with CWD=/ +# +set -euo pipefail +exec 1>/var/log/vm_rotation_setup.log 2>&1 +echo > /etc/motd + +echo "Configuring APT" +echo "deb [trusted=yes] https://ooni-deb.s3.eu-central-1.amazonaws.com unstable main" > /etc/apt/sources.list.d/ooni.list +cat < /etc/apt/trusted.gpg.d/ooni.gpg +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mDMEYGISFRYJKwYBBAHaRw8BAQdA4VxoR0gSsH56BbVqYdK9HNQ0Dj2YFVbvKIIZ +JKlaW920Mk9PTkkgcGFja2FnZSBzaWduaW5nIDxjb250YWN0QG9wZW5vYnNlcnZh +dG9yeS5vcmc+iJYEExYIAD4WIQS1oI8BeW5/UhhhtEk3LR/ycfLdUAUCYGISFQIb +AwUJJZgGAAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRA3LR/ycfLdUFk+AQCb +gsUQsAQGxUFvxk1XQ4RgEoh7wy2yTuK8ZCkSHJ0HWwD/f2OAjDigGq07uJPYw7Uo +Ih9+mJ/ubwiPMzUWF6RSdgu4OARgYhIVEgorBgEEAZdVAQUBAQdAx4p1KerwcIhX +HfM9LbN6Gi7z9j4/12JKYOvr0d0yC30DAQgHiH4EGBYIACYWIQS1oI8BeW5/Uhhh +tEk3LR/ycfLdUAUCYGISFQIbDAUJJZgGAAAKCRA3LR/ycfLdUL4cAQCs53fLphhy +6JMwVhRs02LXi1lntUtw1c+EMn6t7XNM6gD+PXpbgSZwoV3ZViLqr58o9fZQtV3s +oN7jfdbznrWVigE= +=PtYb +-----END PGP PUBLIC KEY BLOCK----- +EOF + +# Vector +cat < /etc/apt/trusted.gpg.d/vector.gpg +-----BEGIN PGP PUBLIC KEY BLOCK----- +Version: GnuPG v2 + +mQENBF9gFZ0BCADETtIHM8y5ehMoyNiZcriK+tHXyKnbZCKtMCKcC4ll94/6pekQ +jKIPWg8OXojkCtwua/TsddtQmOhUxAUtv6K0jO8r6sJ8rezMhuNH8J8rMqWgzv9d +2+U7Z7GFgcP0OeD+KigtnR8uyp50suBmEDC8YytmmbESmG261Y38vZME0VvQ+CMy +Yi/FvKXBXugaiCtaz0a5jVE86qSZbKbuaTHGiLn05xjTqc4FfyP4fi4oT2r6GGyL +Bn5ob84OjXLQwfbZIIrNFR10BvL2SRLL0kKKVlMBBADodtkdwaTt0pGuyEJ+gVBz +629PZBtSrwVRU399jGSfsxoiLca9//c7OJzHABEBAAG0OkNsb3Vkc21pdGggUGFj +a2FnZSAodGltYmVyL3ZlY3RvcikgPHN1cHBvcnRAY2xvdWRzbWl0aC5pbz6JATcE +EwEIACEFAl9gFZ0CGy8FCwkIBwMFFQoJCAsFFgIDAQACHgECF4AACgkQNUPbLQor +xLhf6gf8DyfIpKjvEeW/O8lRUTpkiPKezJbb+udZboCXJKDD02Q9PE3hfEfQRr5X +muytL7YMPvzqBVuP3xV5CN3zvtiQQbZiDhstImVyd+t24pQTkjzkvy+A2yvUuIkE +RWxuey41f5FNj/7wdfJnHoU9uJ/lvsb7DLXw7FBMZFNBR6LED/d+b61zMzVvmFZA +gsrCGwr/jfySwnpShmKdJaMTHQx0qt2RfXwNm2V6i900tAuMUWnmUIz5/9vENPKm +0+31I43a/QgmIrKEePhwn2jfA1oRlYzdv+PbblSTfjTStem+GqQkj9bZsAuqVH8g +3vq0NvX0k2CLi/W9mTiSdHXFChI15A== +=k36w +-----END PGP PUBLIC KEY BLOCK----- +EOF + +echo "deb https://repositories.timber.io/public/vector/deb/debian bullseye main" > /etc/apt/sources.list.d/vector.list + +echo "Installing packages" +export DEBIAN_FRONTEND=noninteractive +apt-get update -q +apt-get purge -qy unattended-upgrades rsyslog +apt-get upgrade -qy +apt-get install -qy --no-install-recommends chrony netdata oohelperd netdata-plugins-python + +systemctl daemon-reload +systemctl restart systemd-journald.service +logger start +systemctl restart systemd-journald.service + +apt-get install -qy --no-install-recommends vector + +echo "Configuring Vector" +# The certs are copied over by rotation.py +cat > /etc/vector/vector.toml < /etc/netdata/netdata.conf < /var/run/rotation_setup_completed diff --git a/ansible/roles/ooni-backend/templates/tor_targets.json b/ansible/roles/ooni-backend/templates/tor_targets.json new file mode 100644 index 00000000..933c4ede --- /dev/null +++ b/ansible/roles/ooni-backend/templates/tor_targets.json @@ -0,0 +1,304 @@ +{ + "128.31.0.39:9101": { + "address": "128.31.0.39:9101", + "fingerprint": "9695DFC35FFEB861329B9F1AB04C46397020CE31", + "name": "moria1", + "protocol": "or_port_dirauth" + }, + "128.31.0.39:9131": { + "address": "128.31.0.39:9131", + "fingerprint": "9695DFC35FFEB861329B9F1AB04C46397020CE31", + "name": "moria1", + "protocol": "dir_port" + }, + "131.188.40.189:443": { + "address": "131.188.40.189:443", + "fingerprint": "F2044413DAC2E02E3D6BCF4735A19BCA1DE97281", + "name": "gabelmoo", + "protocol": "or_port_dirauth" + }, + "131.188.40.189:80": { + "address": "131.188.40.189:80", + "fingerprint": "F2044413DAC2E02E3D6BCF4735A19BCA1DE97281", + "name": "gabelmoo", + "protocol": "dir_port" + }, + "154.35.175.225:443": { + "address": "154.35.175.225:443", + "fingerprint": "CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC", + "name": "Faravahar", + "protocol": "or_port_dirauth" + }, + "154.35.175.225:80": { + "address": "154.35.175.225:80", + "fingerprint": "CF6D0AAFB385BE71B8E111FC5CFF4B47923733BC", + "name": "Faravahar", + "protocol": "dir_port" + }, + "171.25.193.9:443": { + "address": "171.25.193.9:443", + "fingerprint": "BD6A829255CB08E66FBE7D3748363586E46B3810", + "name": "maatuska", + "protocol": "dir_port" + }, + "171.25.193.9:80": { + "address": "171.25.193.9:80", + "fingerprint": "BD6A829255CB08E66FBE7D3748363586E46B3810", + "name": "maatuska", + "protocol": "or_port_dirauth" + }, + "193.23.244.244:443": { + "address": "193.23.244.244:443", + "fingerprint": "7BE683E65D48141321C5ED92F075C55364AC7123", + "name": "dannenberg", + "protocol": "or_port_dirauth" + }, + "193.23.244.244:80": { + "address": "193.23.244.244:80", + "fingerprint": "7BE683E65D48141321C5ED92F075C55364AC7123", + "name": "dannenberg", + "protocol": "dir_port" + }, + "199.58.81.140:443": { + "address": "199.58.81.140:443", + "fingerprint": "74A910646BCEEFBCD2E874FC1DC997430F968145", + "name": "longclaw", + "protocol": "or_port_dirauth" + }, + "199.58.81.140:80": { + "address": "199.58.81.140:80", + "fingerprint": "74A910646BCEEFBCD2E874FC1DC997430F968145", + "name": "longclaw", + "protocol": "dir_port" + }, + "204.13.164.118:443": { + "address": "204.13.164.118:443", + "fingerprint": "24E2F139121D4394C54B5BCC368B3B411857C413", + "name": "bastet", + "protocol": "or_port_dirauth" + }, + "204.13.164.118:80": { + "address": "204.13.164.118:80", + "fingerprint": "24E2F139121D4394C54B5BCC368B3B411857C413", + "name": "bastet", + "protocol": "dir_port" + }, + "2d7292b5163fb7de5b24cd04032c93a2d4c454431de3a00b5a6d4a3309529e49": { + "address": "193.11.166.194:27020", + "fingerprint": "86AC7B8D430DAC4117E9F42C9EAED18133863AAF", + "params": { + "cert": [ + "0LDeJH4JzMDtkJJrFphJCiPqKx7loozKN7VNfuukMGfHO0Z8OGdzHVkhVAOfo1mUdv9cMg" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "3fa772a44e07856b4c70e958b2f6dc8a29450a823509d5dbbf8b884e7fb5bb9d": { + "address": "192.95.36.142:443", + "fingerprint": "CDF2E852BF539B82BD10E27E9115A31734E378C2", + "params": { + "cert": [ + "qUVQ0srL1JI/vO6V6m/24anYXiJD3QP2HgzUKQtQ7GRqqUvs7P+tG43RtAqdhLOALP7DJQ" + ], + "iat-mode": [ + "1" + ] + }, + "protocol": "obfs4" + }, + "45.66.33.45:443": { + "address": "45.66.33.45:443", + "fingerprint": "7EA6EAD6FD83083C538F44038BBFA077587DD755", + "name": "dizum", + "protocol": "or_port_dirauth" + }, + "45.66.33.45:80": { + "address": "45.66.33.45:80", + "fingerprint": "7EA6EAD6FD83083C538F44038BBFA077587DD755", + "name": "dizum", + "protocol": "dir_port" + }, + "49116bf72d336bb8724fd3a06a5afa7bbd4e7baef35fbcdb9a98d13e702270ad": { + "address": "146.57.248.225:22", + "fingerprint": "10A6CD36A537FCE513A322361547444B393989F0", + "params": { + "cert": [ + "K1gDtDAIcUfeLqbstggjIw2rtgIKqdIhUlHp82XRqNSq/mtAjp1BIC9vHKJ2FAEpGssTPw" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "4a330634c5d678887f0f7c299490af43a6ac9fa944a6cc2140ab264c9ec124a0": { + "address": "209.148.46.65:443", + "fingerprint": "74FAD13168806246602538555B5521A0383A1875", + "params": { + "cert": [ + "ssH+9rP8dG2NLDN2XuFw63hIO/9MNNinLmxQDpVa+7kTOa9/m+tGWT1SmSYpQ9uTBGa6Hw" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "548eebff71da6128321c3bc1c3ec12b5bfff277ef5cde32709a33e207b57f3e2": { + "address": "37.218.245.14:38224", + "fingerprint": "D9A82D2F9C2F65A18407B1D2B764F130847F8B5D", + "params": { + "cert": [ + "bjRaMrr1BRiAW8IE9U5z27fQaYgOhX1UCmOpg2pFpoMvo6ZgQMzLsaTzzQNTlm7hNcb+Sg" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "5aeb9e43b43fc8a809b8d25aae968395a5ceea0e677caaf56e1c0a2ba002f5b5": { + "address": "193.11.166.194:27015", + "fingerprint": "2D82C2E354D531A68469ADF7F878FA6060C6BACA", + "params": { + "cert": [ + "4TLQPJrTSaDffMK7Nbao6LC7G9OW/NHkUwIdjLSS3KYf0Nv4/nQiiI8dY2TcsQx01NniOg" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "66.111.2.131:9001": { + "address": "66.111.2.131:9001", + "fingerprint": "BA44A889E64B93FAA2B114E02C2A279A8555C533", + "name": "Serge", + "protocol": "or_port_dirauth" + }, + "66.111.2.131:9030": { + "address": "66.111.2.131:9030", + "fingerprint": "BA44A889E64B93FAA2B114E02C2A279A8555C533", + "name": "Serge", + "protocol": "dir_port" + }, + "662218447d396b9d4f01b585457d267735601fedbeb9a19b86b942f238fe4e7b": { + "address": "51.222.13.177:80", + "fingerprint": "5EDAC3B810E12B01F6FD8050D2FD3E277B289A08", + "params": { + "cert": [ + "2uplIpLQ0q9+0qMFrK5pkaYRDOe460LL9WHBvatgkuRr/SL31wBOEupaMMJ6koRE6Ld0ew" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "75fe96d641a078fee06529af376d7f8c92757596e48558d5d02baa1e10321d10": { + "address": "45.145.95.6:27015", + "fingerprint": "C5B7CD6946FF10C5B3E89691A7D3F2C122D2117C", + "params": { + "cert": [ + "TD7PbUO0/0k6xYHMPW3vJxICfkMZNdkRrb63Zhl5j9dW3iRGiCx0A7mPhe5T2EDzQ35+Zw" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "86.59.21.38:443": { + "address": "86.59.21.38:443", + "fingerprint": "847B1F850344D7876491A54892F904934E4EB85D", + "name": "tor26", + "protocol": "or_port_dirauth" + }, + "86.59.21.38:80": { + "address": "86.59.21.38:80", + "fingerprint": "847B1F850344D7876491A54892F904934E4EB85D", + "name": "tor26", + "protocol": "dir_port" + }, + "99e9adc8bba0d60982dbc655b5e8735d88ad788905c3713a39eff3224b617eeb": { + "address": "38.229.1.78:80", + "fingerprint": "C8CBDB2464FC9804A69531437BCF2BE31FDD2EE4", + "params": { + "cert": [ + "Hmyfd2ev46gGY7NoVxA9ngrPF2zCZtzskRTzoWXbxNkzeVnGFPWmrTtILRyqCTjHR+s9dg" + ], + "iat-mode": [ + "1" + ] + }, + "protocol": "obfs4" + }, + "9d735c6e70512123ab2c2fe966446b2345b352c512e9fb359f4b1673236e4d4a": { + "address": "38.229.33.83:80", + "fingerprint": "0BAC39417268B96B9F514E7F63FA6FBA1A788955", + "params": { + "cert": [ + "VwEFpk9F/UN9JED7XpG1XOjm/O8ZCXK80oPecgWnNDZDv5pdkhq1OpbAH0wNqOT6H6BmRQ" + ], + "iat-mode": [ + "1" + ] + }, + "protocol": "obfs4" + }, + "b7c0e3f183ad85a6686ec68344765cec57906b215e7b82a98a9ca013cb980efa": { + "address": "193.11.166.194:27025", + "fingerprint": "1AE2C08904527FEA90C4C4F8C1083EA59FBC6FAF", + "params": { + "cert": [ + "ItvYZzW5tn6v3G4UnQa6Qz04Npro6e81AP70YujmK/KXwDFPTs3aHXcHp4n8Vt6w/bv8cA" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "b8de51da541ced804840b1d8fd24d5ff1cfdf07eae673dae38c2bc2cce594ddd": { + "address": "85.31.186.26:443", + "fingerprint": "91A6354697E6B02A386312F68D82CF86824D3606", + "params": { + "cert": [ + "PBwr+S8JTVZo6MPdHnkTwXJPILWADLqfMGoVvhZClMq/Urndyd42BwX9YFJHZnBB3H0XCw" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "d2d6e34abeda851f7cd37138ffafcce992b2ccdb0f263eb90ab75d7adbd5eeba": { + "address": "85.31.186.98:443", + "fingerprint": "011F2599C0E9B27EE74B353155E244813763C3E5", + "params": { + "cert": [ + "ayq0XzCwhpdysn5o0EyDUbmSOx3X/oTEbzDMvczHOdBJKlvIdHHLJGkZARtT4dcBFArPPg" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + }, + "f855ba38d517d8589c16e1333ac23c6e516532cf036ab6f47b15030b40a3b6a6": { + "address": "[2a0c:4d80:42:702::1]:27015", + "fingerprint": "C5B7CD6946FF10C5B3E89691A7D3F2C122D2117C", + "params": { + "cert": [ + "TD7PbUO0/0k6xYHMPW3vJxICfkMZNdkRrb63Zhl5j9dW3iRGiCx0A7mPhe5T2EDzQ35+Zw" + ], + "iat-mode": [ + "0" + ] + }, + "protocol": "obfs4" + } +} \ No newline at end of file From d5ef17e351d8e16f55912e260d92ee25446b4c1b Mon Sep 17 00:00:00 2001 From: decfox Date: Wed, 4 Dec 2024 17:51:43 -0500 Subject: [PATCH 41/88] remove instances of ams-pg and replace test with backend-hel --- ansible/roles/ooni-backend/tasks/main.yml | 183 +++--------------- .../templates/clickhouse_config.xml | 2 +- 2 files changed, 30 insertions(+), 155 deletions(-) diff --git a/ansible/roles/ooni-backend/tasks/main.yml b/ansible/roles/ooni-backend/tasks/main.yml index 55c56bf6..a6ee12d6 100644 --- a/ansible/roles/ooni-backend/tasks/main.yml +++ b/ansible/roles/ooni-backend/tasks/main.yml @@ -17,7 +17,7 @@ state: directory - name: configure test api - when: inventory_hostname == 'ams-pg-test.ooni.org' + when: inventory_hostname == 'backend-hel.ooni.org' tags: api template: src: api.conf @@ -38,29 +38,6 @@ # mail_smtp_password: "DISABLED" # jwt_encryption_key and account_id_hashing_key are taken from the vault -- name: configure backend-hel api - when: inventory_hostname == 'backend-hel.ooni.org' - tags: api - template: - src: api.conf - dest: /etc/ooni/api.conf - owner: ooniapi - group: ooniapi - mode: 0640 - vars: - collectors: ['backend-hel.ooni.org',] - # bucket_name and collector_id must match the uploader - collector_id: 3 - # test bucket - bucket_name: ooni-data-eu-fra-test - # test GH repo - github_push_repo: "ooni-bot/test-lists" - github_origin_repo: "citizenlab/test-lists" - login_base_url: "https://test-lists.ooni.org/login" - pg_uri: "" - clickhouse_url: clickhouse://api:api@localhost/default - base_url: "https://backend-hel.ooni.org" - - name: configure backend-fsn api when: inventory_hostname == 'backend-fsn.ooni.org' tags: api @@ -71,7 +48,7 @@ group: ooniapi mode: 0640 vars: - collectors: ['backend-fsn.ooni.org', 'ams-pg.ooni.org'] + collectors: ['backend-fsn.ooni.org'] # bucket_name and collector_id must match the uploader collector_id: 1 bucket_name: ooni-data-eu-fra @@ -82,26 +59,6 @@ clickhouse_url: clickhouse://api:api@localhost/default base_url: "https://api.ooni.io" -- name: configure prod api - when: inventory_hostname == 'ams-pg.ooni.org' - tags: api - template: - src: api.conf - dest: /etc/ooni/api.conf - owner: ooniapi - group: ooniapi - mode: 0640 - vars: - collectors: ['backend-fsn.ooni.org', 'ams-pg.ooni.org'] - # collector_id must match the uploader - collector_id: 0 - bucket_name: ooni-data-eu-fra - github_push_repo: "ooni/test-lists" - github_origin_repo: "citizenlab/test-lists" - login_base_url: "https://test-lists.ooni.org/login" - pg_uri: "postgresql://shovel:yEqgNr2eXvgG255iEBxVeP@localhost/metadb" - clickhouse_url: "" - - name: create Psiphon conffile tags: api copy: @@ -115,7 +72,7 @@ dest: /etc/ooni/tor_targets.json - name: configure api uploader using test bucket - when: inventory_hostname == 'ams-pg-test.ooni.org' + when: inventory_hostname == 'backend-hel.ooni.org' tags: api template: src: templates/api-uploader.conf @@ -136,44 +93,9 @@ bucket_name: ooni-data-eu-fra collector_id: 1 -# - name: configure HEL api uploader using test bucket -# when: inventory_hostname == 'backend-hel.ooni.org' -# tags: api -# template: -# src: templates/api-uploader.conf -# dest: /etc/ooni/api-uploader.conf -# vars: -# # bucket_name and collector_id must match the API -# bucket_name: ooni-data-eu-fra-test -# collector_id: 3 - - ## Haproxy and nginx ## -- name: configure api uploader using PROD bucket - when: inventory_hostname == 'ams-pg.ooni.org' - tags: api - template: - src: templates/api-uploader.conf - dest: /etc/ooni/api-uploader.conf - vars: - # bucket_name and collector_id must match the API - bucket_name: ooni-data-eu-fra - collector_id: 0 - - name: Overwrite API nginx test conf - when: inventory_hostname == 'ams-pg-test.ooni.org' - tags: api, webserv - template: - src: templates/nginx-api-test.conf - dest: /etc/nginx/sites-available/ooni-api.conf - mode: 0755 - owner: root - vars: - # Uses dehydrated - certpath: /var/lib/dehydrated/certs/ - -- name: Overwrite API nginx HEL conf when: inventory_hostname == 'backend-hel.ooni.org' tags: api, webserv template: @@ -186,7 +108,7 @@ certpath: /var/lib/dehydrated/certs/ - name: install haproxy if not present - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: webserv apt: cache_valid_time: 86400 @@ -194,7 +116,7 @@ state: present - name: Deploy haproxy conf - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, webserv template: src: templates/haproxy.cfg @@ -206,7 +128,7 @@ certpath: /var/lib/dehydrated/certs/ - name: Delete old files - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, webserv ansible.builtin.file: path: "{{ item }}" @@ -217,7 +139,7 @@ - /etc/nginx/sites-enabled/deb_ooni_org_http - name: Deploy dehydrated conf - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, webserv template: src: templates/dehydrated.config @@ -226,7 +148,7 @@ owner: root - name: Deploy dehydrated conf - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, webserv template: src: templates/dehydrated.config @@ -235,7 +157,7 @@ owner: root - name: Deploy dehydrated haproxy hook - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, webserv template: src: templates/dehydrated_haproxy_hook.sh @@ -255,17 +177,6 @@ # Uses dehydrated certpath: /var/lib/dehydrated/certs/ -- name: Overwrite API nginx prod conf - when: inventory_hostname == 'ams-pg.ooni.org' - tags: api, webserv - template: - src: templates/nginx-api-ams-pg.conf - dest: /etc/nginx/sites-available/ooni-api.conf - mode: 0755 - owner: root - vars: - certpath: /etc/letsencrypt/live/ - - name: Deploy API gunicorn conf tags: api template: @@ -291,7 +202,7 @@ dest: /etc/nginx/sites-enabled/deb_ooni_org - name: Configure deb-ci.ooni.org forwarder on test host - when: inventory_hostname == 'ams-pg-test.ooni.org' + when: inventory_hostname == 'backend-hel.ooni.org' tags: deb_ooni_org blockinfile: path: /etc/nginx/sites-enabled/deb_ooni_org_http @@ -323,7 +234,7 @@ - name: Restart haproxy # reload is not enough - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: api, deb_ooni_org, webserv shell: systemctl restart haproxy @@ -364,7 +275,7 @@ state: present - name: configure fastpath on test - when: inventory_hostname == 'ams-pg-test.ooni.org' + when: inventory_hostname == 'backend-hel.ooni.org' tags: fastpath template: src: fastpath.conf @@ -388,30 +299,6 @@ clickhouse_url: clickhouse://fastpath:fastpath@localhost/default -- name: configure fastpath on HEL - when: inventory_hostname == 'backend-hel.ooni.org' - tags: fastpath - template: - src: fastpath.conf - dest: /etc/ooni/fastpath.conf - owner: fastpath - group: fastpath - mode: 0640 - vars: - clickhouse_url: clickhouse://fastpath:fastpath@localhost/default -- name: configure fastpath on ams-pg - when: inventory_hostname == 'ams-pg.ooni.org' - tags: fastpath - template: - src: fastpath.conf - dest: /etc/ooni/fastpath.conf - owner: fastpath - group: fastpath - mode: 0640 - vars: - clickhouse_url: - psql_uri: postgresql://shovel:yEqgNr2eXvgG255iEBxVeP@localhost/metadb - ## Event detector ## @@ -523,7 +410,7 @@ ## Tor daemon and onion service ## - +## TODO(decfox): get rid of this? - name: configure tor onion service hostname when: inventory_hostname == 'ams-pg.ooni.org' tags: tor @@ -558,7 +445,7 @@ - name: install APT HTTPS support # do not update package if present - when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse apt: cache_valid_time: 86400 @@ -569,12 +456,12 @@ - dirmngr - name: install clickhouse keys - when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse command: apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv 8919F6BD2B48D754 - name: set clickhouse repos - when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse blockinfile: path: /etc/apt/sources.list.d/clickhouse.list @@ -594,7 +481,7 @@ Pin-Priority: 999 - name: pin clickhouse release train - when: inventory_hostname in ('backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-hel.ooni.org') tags: clickhouse blockinfile: path: /etc/apt/preferences.d/clickhouse-server @@ -617,21 +504,9 @@ vars: clickhouse_pkg_ver: 21.8.12.* -- name: install clickhouse on backend-hel +- name: install clickhouse on backend-hel.ooni.org when: inventory_hostname == 'backend-hel.ooni.org' tags: clickhouse - apt: - # refresh cache - cache_valid_time: 0 - name: - - clickhouse-server={{ clickhouse_pkg_ver }} - - clickhouse-client={{ clickhouse_pkg_ver }} - vars: - clickhouse_pkg_ver: 23.8.2.* - -- name: install clickhouse on ams-pg-test.ooni.org - when: inventory_hostname == 'ams-pg-test.ooni.org' - tags: clickhouse apt: # refresh cache cache_valid_time: 0 @@ -643,7 +518,7 @@ clickhouse_pkg_ver: 23.8.2.* - name: install clickhouse conf override - when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse template: src: clickhouse_config.xml @@ -654,7 +529,7 @@ notify: restart clickhouse - name: allow incoming TCP connections from monitoring to Clickhouse prometheus interface - when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse blockinfile: path: /etc/ooni/nftables/tcp/9363.nft @@ -664,7 +539,7 @@ notify: reload nftables - name: allow incoming TCP connections from jupiter on monitoring.ooni.org to Clickhouse - when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse blockinfile: path: /etc/ooni/nftables/tcp/9000.nft @@ -674,7 +549,7 @@ notify: reload nftables - name: Run clickhouse - when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse systemd: name: clickhouse-server.service @@ -685,7 +560,7 @@ # https://clickhouse.com/docs/en/operations/access-rights/#enabling-access-control - name: Clickhouse - test admin user - failure is ok to ignore - when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse-users command: clickhouse-client -u admin --password admin -q 'select 1' ignore_errors: true @@ -771,8 +646,8 @@ -- name: Run feeder on ams-pg-test - when: inventory_hostname == 'ams-pg-test.ooni.org' +- name: Run feeder on backend-hel + when: inventory_hostname == 'backend-hel.ooni.org' tags: clickhouse blockinfile: path: /etc/ooni/clickhouse_feeder.conf @@ -791,18 +666,18 @@ block: | [DEFAULT] pg_dbuser = readonly - pg_dbhost = ams-pg.ooni.org + pg_dbhost = backend-hel.ooni.org - name: Run feeder - when: inventory_hostname in ('backend-fsn.ooni.org', 'ams-pg-test.ooni.org') + when: inventory_hostname in ('backend-fsn.ooni.org', 'backend-hel.ooni.org') tags: clickhouse systemd: name: ooni-clickhouse-feeder.service state: started enabled: yes -- name: Run DB backup on ams-pg-test - when: inventory_hostname == 'ams-pg-test.ooni.org' +- name: Run DB backup on backend-hel + when: inventory_hostname == 'backend-hel.ooni.org' tags: dbbackup template: src: db-backup.conf diff --git a/ansible/roles/ooni-backend/templates/clickhouse_config.xml b/ansible/roles/ooni-backend/templates/clickhouse_config.xml index e84e53ff..548c2a81 100644 --- a/ansible/roles/ooni-backend/templates/clickhouse_config.xml +++ b/ansible/roles/ooni-backend/templates/clickhouse_config.xml @@ -16,7 +16,7 @@ {{ inventory_hostname.replace(".ooni.org", "") }} {% endif %} -{% if inventory_hostname == 'ams-pg-test.ooni.org' %} +{% if inventory_hostname == 'backend-hel.ooni.org' %} 500100100 3100100100 {% endif %} From 0a8be559e5be662ee949e129bdde434502c06fa6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 4 Dec 2024 18:14:04 -0500 Subject: [PATCH 42/88] Re-organization of monitoring * Split playbooks into tiers * Add support for clickhouse cluster monitoring --- ansible/deploy-clickhouse.yml | 13 ++ ansible/deploy-monitoring.yml | 12 ++ ansible/deploy-tier0.yml | 19 ++ ansible/deploy-tier2.yml | 25 +++ ansible/group_vars/all/vars.yml | 2 + ansible/group_vars/clickhouse/vars.yml | 20 ++- ansible/inventory | 30 ++-- ansible/playbook.yml | 71 +------- ansible/requirements.yml | 1 - ansible/roles/nginx/defaults/main.yml | 1 + ansible/roles/nginx/templates/nginx.conf | 169 ++++++------------ .../roles/prometheus/templates/prometheus.yml | 24 +++ .../defaults/main.yml | 16 ++ .../handlers/main.yml | 5 + .../tasks/install.yml | 60 +++++++ .../prometheus_node_exporter/tasks/main.yml | 14 +- .../templates/nginx-prometheus.j2 | 10 +- .../templates/node_exporter.service.j2 | 11 ++ scripts/cluster-migration/benchmark.sql | 55 ++++++ scripts/cluster-migration/db-sample.py | 33 ++++ 20 files changed, 374 insertions(+), 217 deletions(-) create mode 100644 ansible/deploy-clickhouse.yml create mode 100644 ansible/deploy-monitoring.yml create mode 100644 ansible/deploy-tier0.yml create mode 100644 ansible/deploy-tier2.yml create mode 100644 ansible/roles/nginx/defaults/main.yml create mode 100644 ansible/roles/prometheus_node_exporter/defaults/main.yml create mode 100644 ansible/roles/prometheus_node_exporter/tasks/install.yml create mode 100644 ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 create mode 100644 scripts/cluster-migration/benchmark.sql create mode 100644 scripts/cluster-migration/db-sample.py diff --git a/ansible/deploy-clickhouse.yml b/ansible/deploy-clickhouse.yml new file mode 100644 index 00000000..c2d34cc7 --- /dev/null +++ b/ansible/deploy-clickhouse.yml @@ -0,0 +1,13 @@ +--- +- name: Deploy oonidata clickhouse hosts + hosts: + - notebook.ooni.org + - data1.htz-fsn.prod.ooni.nu + #- data2.htz-fsn.prod.ooni.nu + - data3.htz-fsn.prod.ooni.nu + become: true + tags: + - clickhouse + roles: + - prometheus_node_exporter + - oonidata_clickhouse diff --git a/ansible/deploy-monitoring.yml b/ansible/deploy-monitoring.yml new file mode 100644 index 00000000..a1eadee9 --- /dev/null +++ b/ansible/deploy-monitoring.yml @@ -0,0 +1,12 @@ +--- +- name: Update monitoring config + hosts: monitoring.ooni.org + become: true + tags: + - monitoring + roles: + - prometheus + - prometheus_blackbox_exporter + - prometheus_alertmanager + + diff --git a/ansible/deploy-tier0.yml b/ansible/deploy-tier0.yml new file mode 100644 index 00000000..ffe68c02 --- /dev/null +++ b/ansible/deploy-tier0.yml @@ -0,0 +1,19 @@ +--- +- name: Include monitoring playbook + ansible.builtin.import_playbook: deploy-monitoring.yml + +- name: Include clickhouse playbook + ansible.builtin.import_playbook: deploy-clickhouse.yml + +- name: Deploy oonidata worker nodes + hosts: + - data1.htz-fsn.prod.ooni.nu + become: true + tags: + - oonidata_worker + roles: + - oonidata + vars: + enable_jupyterhub: false + enable_oonipipeline_worker: true + clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" diff --git a/ansible/deploy-tier2.yml b/ansible/deploy-tier2.yml new file mode 100644 index 00000000..8f87a663 --- /dev/null +++ b/ansible/deploy-tier2.yml @@ -0,0 +1,25 @@ +--- +- name: Setup OpenVPN server + hosts: openvpn-server1.ooni.io + become: true + remote_user: root + roles: + - ssh_users + +- name: Deploy notebook host + hosts: notebook.ooni.org + become: true + tags: + - notebook + vars: + enable_oonipipeline_worker: false + roles: + - oonidata + +# commented out due to the fact it requires manual config of ~/.ssh/config +#- name: Setup codesign box +# hosts: codesign-box +# become: true +# remote_user: ubuntu +# roles: +# - codesign_box diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 17712861..d18687cd 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -27,3 +27,5 @@ admin_usernames: [ art, mehul ] root_usernames: [ art, mehul ] non_admin_usernames: [ ] deactivated_usernames: [ sbs, federico, sarath ] + +prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" diff --git a/ansible/group_vars/clickhouse/vars.yml b/ansible/group_vars/clickhouse/vars.yml index 8483e095..f1ac5248 100644 --- a/ansible/group_vars/clickhouse/vars.yml +++ b/ansible/group_vars/clickhouse/vars.yml @@ -26,7 +26,7 @@ clickhouse_config: max_connections: 4096 keep_alive_timeout: 3 max_concurrent_queries: 100 - max_server_memory_usage: 0 + max_server_memory_usage: 21001001000 max_thread_pool_size: 10000 max_server_memory_usage_to_ram_ratio: 0.9 total_memory_profiler_step: 4194304 @@ -156,6 +156,10 @@ clickhouse_distributed_ddl: clickhouse_default_profiles: default: readonly: 2 + max_memory_usage: 11001001000 + use_uncompressed_cache: 0 + load_balancing: random + max_partitions_per_insert_block: 100 readonly: readonly: 1 write: @@ -196,3 +200,17 @@ clickhouse_default_quotas: result_rows: 0 read_rows: 0 execution_time: 0 + +clickhouse_prometheus: + endpoint: "/metrics" + port: 9363 + metrics: true + events: true + asynchronous_metrics: true + status_info: true + +prometheus_nginx_proxy_config: + - location: /metrics/node_exporter + proxy_pass: http://127.0.0.1:8100/metrics + - location: /metrics/clickhouse + proxy_pass: http://127.0.0.1:9363/metrics diff --git a/ansible/inventory b/ansible/inventory index 31270622..a44f8d45 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -1,19 +1,8 @@ -[all] -# This requires manual setup of ~/.ssh/config -#codesign-box +[all:children] +htz-fsn +ghs-ams -[prod] -data.ooni.org -oonidata.ooni.org -monitoring.ooni.org -openvpn-server1.ooni.io -notebook.ooni.org -data1.htz-fsn.prod.ooni.nu -data2.htz-fsn.prod.ooni.nu -data3.htz-fsn.prod.ooni.nu - -[dev] -oonidatatest.ooni.nu +## Role tags [clickhouse] notebook.ooni.org @@ -21,12 +10,15 @@ data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu -[have_node_exporter] +## Location tags + +[htz-fsn] data.ooni.org -oonidata.ooni.org -openvpn-server1.ooni.io +monitoring.ooni.org notebook.ooni.org data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu -oonidatatest.ooni.nu + +[ghs-ams] +openvpn-server1.ooni.io diff --git a/ansible/playbook.yml b/ansible/playbook.yml index be962b57..17bcd402 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -7,71 +7,8 @@ tags: - bootstrap -- name: Setup node_exporter on have_node_exporter hosts - hosts: all - become: yes - roles: - - prometheus_node_exporter - tags: - - prometheus_node_exporter - -- name: Update monitoring config - hosts: monitoring.ooni.org - become: true - tags: - - monitoring - roles: - - prometheus - - prometheus_blackbox_exporter - - prometheus_alertmanager - -- name: Setup OpenVPN server - hosts: openvpn-server1.ooni.io - become: true - remote_user: root - roles: - - ssh_users - -- name: Deploy oonidata clickhouse hosts - hosts: - - data1.htz-fsn.prod.ooni.nu - #- data2.htz-fsn.prod.ooni.nu - - data3.htz-fsn.prod.ooni.nu - - notebook.ooni.org - become: true - tags: - - clickhouse - roles: - #- tailnet - - oonidata_clickhouse - -- name: Deploy oonidata worker nodes - hosts: - - data1.htz-fsn.prod.ooni.nu - become: true - tags: - - oonidata_worker - roles: - - oonidata - vars: - enable_jupyterhub: false - enable_oonipipeline_worker: true - clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" - -- name: Deploy notebook host - hosts: notebook.ooni.org - become: true - tags: - - notebook - vars: - enable_oonipipeline_worker: false - roles: - - oonidata +- name: Include tier0 playbook + ansible.builtin.import_playbook: deploy-tier0.yml -# commented out due to the fact it requires manual config of ~/.ssh/config -#- name: Setup codesign box -# hosts: codesign-box -# become: true -# remote_user: ubuntu -# roles: -# - codesign_box +- name: Include tier2 playbook + ansible.builtin.import_playbook: deploy-tier2.yml diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 0a2eae7d..52ae85ea 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -1,7 +1,6 @@ - src: willshersystems.sshd - src: nginxinc.nginx - src: geerlingguy.certbot -- src: geerlingguy.node_exporter - src: artis3n.tailscale - src: https://github.com/idealista/clickhouse_role scm: git diff --git a/ansible/roles/nginx/defaults/main.yml b/ansible/roles/nginx/defaults/main.yml new file mode 100644 index 00000000..4c0ac11a --- /dev/null +++ b/ansible/roles/nginx/defaults/main.yml @@ -0,0 +1 @@ +nginx_user: nginx diff --git a/ansible/roles/nginx/templates/nginx.conf b/ansible/roles/nginx/templates/nginx.conf index f43bf7c5..7b1b594c 100644 --- a/ansible/roles/nginx/templates/nginx.conf +++ b/ansible/roles/nginx/templates/nginx.conf @@ -1,122 +1,61 @@ -# NB: system nginx uses `www-data` user! -user nginx; -worker_processes 2; +# Managed by ansible +# roles/nginx/templates/nginx.conf +# -error_log /var/log/nginx/error.log warn; -pid /var/run/nginx.pid; +user {{ nginx_user }}; +worker_processes auto; +pid /run/nginx.pid; +include /etc/nginx/modules-enabled/*.conf; events { - worker_connections 1024; + worker_connections 768; + # multi_accept on; } http { - include /etc/nginx/mime.types; - default_type application/octet-stream; - - geo $is_ooni { - # TODO: this is not implemented ATM - default 0; - } - - map $http_x_request_id $has_request_id { # check for `X-Request-ID` - "" 0; - default 1; - } - - map "$is_ooni:$has_request_id" $ooni_request_id { - "1:1" $http_x_request_id; # use `X-Request-ID` if it's okay - default $request_id; - } - - # IPv4 is anonymized to /24, IPv6 to /48 - according to OONI Data Policy. - # https://ooni.torproject.org/about/data-policy/ - # IP is recorded to track possible abusers, not to distinguish users, so the - # address is truncated down to ISP (min routable prefix) instead of hashing. - map $remote_addr $ooni_remote_addr { - default "0.0.0.0"; - # variables in map value require nginx/1.11.0+ - "~(?P\d+\.\d+\.\d+)\.\d+" "$ip.0"; - # :: means at least TWO zero 16bit fields, https://tools.ietf.org/html/rfc5952#section-4.2.2 - "~(?P[0-9a-f]+:[0-9a-f]+:[0-9a-f]+):[0-9a-f:]+" "$ip::"; - "~(?P[0-9a-f]+:[0-9a-f]+)::[0-9a-f:]+" "$ip::"; - "~(?P[0-9a-f]+)::[0-9a-f:]+" "$ip::"; - } - - # $server_name is important as mtail does not distinguish log lines from - # different files, $host is required to log actual `Host` header. - # $request is split into separate fields to ease awk and mtail parsing. - # $scheme is used instead of $https to ease eye-reading. - # TCP_INFO is logged for random fun. - log_format mtail_pub - '$time_iso8601\t$msec\t$server_name\t' - '$ooni_remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - log_format mtail_int - '$time_iso8601\t$msec\t$server_name\t' - '$remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - log_format oolog '$ooni_remote_addr - $remote_user [$time_local] ' - '"$request" $status $body_bytes_sent ' - '"$http_referer" "$http_user_agent" "$host"'; - - log_format oolog_mtail '$time_iso8601\t$msec\t$server_name\t' - '$ooni_remote_addr\t' # pub/int diff - '$request_completion\t$request_time\t$status\t$bytes_sent\t$body_bytes_sent\t' - '$upstream_cache_status\t$upstream_addr\t$upstream_status\t$upstream_connect_time\t$upstream_header_time\t$upstream_response_time\t' - '$scheme\t$server_protocol\t$request_length\t$request_method\t$host\t$request_uri\t' - '$tcpinfo_rtt\t$tcpinfo_rttvar\t' - '$http_referer\t$http_user_agent\t$ooni_request_id'; - - access_log /var/log/nginx/access.log mtail_int; - - sendfile on; - tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet - - keepalive_timeout 120 120; # Firefox has 115s, http://kb.mozillazine.org/Network.http.keep-alive.timeout - - server_tokens off; - - # SSL based on https://wiki.mozilla.org/Security/Server_Side_TLS (doc v4.1) - ssl_session_timeout 1d; - ssl_session_cache shared:GLOBAL:1m; # 1m of cache is ~4000 sessions - ssl_session_tickets off; # needs accurate key rotation - ssl_dhparam /etc/nginx/ffdhe2048_dhparam.pem; # https://tools.ietf.org/html/rfc7919 - ssl_prefer_server_ciphers on; - #TODO: ssl_stapling on; # needs `resolver` or `ssl_stapling_file` - #TODO: ssl_stapling_verify on; # needs `ssl_trusted_certificate` - #TODO: resolver ; - # Define in server{} - # - include /etc/nginx/ssl_modern.conf | /etc/nginx/ssl_intermediate.conf - # - ssl_certificate /etc/letsencrypt/live/example.org/fullchain.pem; - # - ssl_certificate_key /etc/letsencrypt/live/example.org/privkey.pem - # - ssl_trusted_certificate /etc/letsencrypt/live/example.org/chain.pem; # for ssl_stapling_verify - # - add_header Strict-Transport-Security max-age=15768000; # HSTS (15768000 seconds = 6 months) - ### - - gzip on; - gzip_types text/html text/plain text/css text/xml text/javascript application/x-javascript application/json application/xml; # default is only `text/html` - gzip_disable "msie6"; - #gzip_proxied any; - - # Host, X-Real-IP, X-Forwarded-For, X-Forwarded-Proto are from - # file /etc/nginx/proxy_params from nginx-common package - # NB: adding `proxy_set_header` in another location overwrites whole set! - proxy_set_header Host $http_host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - proxy_set_header X-Forwarded-Proto $scheme; - proxy_set_header X-Request-ID $ooni_request_id; - - include /etc/nginx/conf.d/*.conf; - include /etc/nginx/sites-enabled/*; + + # Basic Settings + + sendfile on; + tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet + types_hash_max_size 2048; + # server_tokens off; + + # server_names_hash_bucket_size 64; + # server_name_in_redirect off; + + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging Settings + + # anonymize ipaddr + map $remote_addr $remote_addr_anon { + ~(?P\d+\.\d+\.\d+)\. $ip.0; + ~(?P[^:]+:[^:]+): $ip::; + default 0.0.0.0; + } + + # log anonymized ipaddr and caching status + log_format ooni_nginx_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' + '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"'; + + access_log syslog:server=unix:/dev/log ooni_nginx_fmt; + error_log syslog:server=unix:/dev/log; + + # Gzip Settings + + gzip on; + + # gzip_vary on; + # gzip_proxied any; + # gzip_comp_level 6; + # gzip_buffers 16 8k; + # gzip_http_version 1.1; + # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; + + # Virtual Host Configs + + include /etc/nginx/conf.d/*.conf; + include /etc/nginx/sites-enabled/*; } diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index e8f9cd30..bed0464e 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -151,6 +151,30 @@ scrape_configs: - targets: - backend-fsn.ooni.org:9363 + - job_name: 'clickhouse cluster' + scrape_interval: 5s + scheme: http + metrics_path: "/metrics/clickhouse" + basic_auth: + username: 'prom' + password: '{{ prometheus_metrics_password_prod }}' + static_configs: + - targets: + - data1.htz-fsn.prod.ooni.nu:9100 + - data3.htz-fsn.prod.ooni.nu:9100 + + - job_name: 'node new' + scrape_interval: 5s + scheme: http + metrics_path: "/metrics/node_exporter" + basic_auth: + username: 'prom' + password: '{{ prometheus_metrics_password_prod }}' + static_configs: + - targets: + - data1.htz-fsn.prod.ooni.nu:9100 + - data3.htz-fsn.prod.ooni.nu:9100 + # See ansible/roles/ooni-backend/tasks/main.yml for the scraping targets - job_name: 'haproxy' scrape_interval: 5s diff --git a/ansible/roles/prometheus_node_exporter/defaults/main.yml b/ansible/roles/prometheus_node_exporter/defaults/main.yml new file mode 100644 index 00000000..3433498f --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/defaults/main.yml @@ -0,0 +1,16 @@ +prometheus_nginx_proxy_config: + - location: /metrics/node_exporter + proxy_pass: http://127.0.0.1:8100/metrics + +node_exporter_version: '1.8.2' +node_exporter_arch: 'amd64' +node_exporter_download_url: https://github.com/prometheus/node_exporter/releases/download/v{{ node_exporter_version }}/node_exporter-{{ node_exporter_version }}.linux-{{ node_exporter_arch }}.tar.gz + +node_exporter_bin_path: /usr/local/bin/node_exporter +node_exporter_host: 'localhost' +node_exporter_port: 8100 +node_exporter_options: '' + +node_exporter_state: started +node_exporter_enabled: true +node_exporter_restart: on-failure diff --git a/ansible/roles/prometheus_node_exporter/handlers/main.yml b/ansible/roles/prometheus_node_exporter/handlers/main.yml index 69a5b2fe..4ec66003 100644 --- a/ansible/roles/prometheus_node_exporter/handlers/main.yml +++ b/ansible/roles/prometheus_node_exporter/handlers/main.yml @@ -13,3 +13,8 @@ ansible.builtin.systemd_service: name: nginx state: restarted + +- name: restart node_exporter + service: + name: node_exporter + state: restarted diff --git a/ansible/roles/prometheus_node_exporter/tasks/install.yml b/ansible/roles/prometheus_node_exporter/tasks/install.yml new file mode 100644 index 00000000..2ad7ccd7 --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/tasks/install.yml @@ -0,0 +1,60 @@ +--- +- name: Check current node_exporter version. + command: "{{ node_exporter_bin_path }} --version" + failed_when: false + changed_when: false + register: node_exporter_version_check + +- name: Download and unarchive node_exporter into temporary location. + unarchive: + src: "{{ node_exporter_download_url }}" + dest: /tmp + remote_src: true + mode: 0755 + when: > + node_exporter_version_check.stdout is not defined + or node_exporter_version not in node_exporter_version_check.stdout + register: node_exporter_download_check + +- name: Move node_exporter binary into place. + copy: + src: "/tmp/node_exporter-{{ node_exporter_version }}.linux-{{ node_exporter_arch }}/node_exporter" + dest: "{{ node_exporter_bin_path }}" + mode: 0755 + remote_src: true + notify: restart node_exporter + when: > + node_exporter_download_check is changed + or node_exporter_version_check.stdout | length == 0 + +- name: Create node_exporter user. + user: + name: node_exporter + shell: /sbin/nologin + state: present + +- name: Copy the node_exporter systemd unit file. + template: + src: node_exporter.service.j2 + dest: /etc/systemd/system/node_exporter.service + mode: 0644 + register: node_exporter_service + +- name: Reload systemd daemon if unit file is changed. + systemd: + daemon_reload: true + notify: restart node_exporter + when: node_exporter_service is changed + +- name: Ensure node_exporter is running and enabled at boot. + service: + name: node_exporter + state: "{{ node_exporter_state }}" + enabled: "{{ node_exporter_enabled }}" + +- name: Verify node_exporter is responding to requests. + uri: + url: "http://{% if node_exporter_host !='' %}{{ node_exporter_host }}{% else %}localhost{% endif %}:{{ node_exporter_port }}/" + return_content: true + register: metrics_output + failed_when: "'Metrics' not in metrics_output.content" diff --git a/ansible/roles/prometheus_node_exporter/tasks/main.yml b/ansible/roles/prometheus_node_exporter/tasks/main.yml index 0c4fc242..cf9f8229 100644 --- a/ansible/roles/prometheus_node_exporter/tasks/main.yml +++ b/ansible/roles/prometheus_node_exporter/tasks/main.yml @@ -4,15 +4,7 @@ - nginx - node_exporter -- ansible.builtin.include_role: - name: geerlingguy.node_exporter - vars: - node_exporter_host: "localhost" - node_exporter_port: 8100 - tags: - - monitoring - - node_exporter - - config +- include_tasks: install.yml - name: create ooni configuration directory ansible.builtin.file: @@ -30,7 +22,7 @@ name: prom password: "{{ prometheus_metrics_password }}" owner: root - group: www-data + group: nginx mode: 0640 tags: - monitoring @@ -55,7 +47,7 @@ nft_rules_tcp: - name: 9100 rules: - - add rule inet filter input tcp dport 9100 counter accept comment "Incoming prometheus monitoring" + - add rule inet filter input ip saddr 5.9.112.244 tcp dport 9100 counter accept comment "clickhouse prometheus from monitoring.ooni.org" tags: - monitoring - node_exporter diff --git a/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 b/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 index 7d9fbab1..7e68c45c 100644 --- a/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 +++ b/ansible/roles/prometheus_node_exporter/templates/nginx-prometheus.j2 @@ -7,14 +7,18 @@ server { access_log /var/log/nginx/{{ inventory_hostname }}.access.log; error_log /var/log/nginx/{{ inventory_hostname }}.log warn; - location /metrics { + {% for config in prometheus_nginx_proxy_config %} + + location {{ config['location'] }} { auth_basic "Administrator’s Area"; auth_basic_user_file /etc/ooni/prometheus_passwd; - proxy_pass http://127.0.0.1:8100; + proxy_pass {{ config['proxy_pass'] }}; proxy_set_header X-Real-IP $remote_addr; proxy_set_header Host $host; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } -} \ No newline at end of file + + {% endfor %} +} diff --git a/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 b/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 new file mode 100644 index 00000000..42cb98cc --- /dev/null +++ b/ansible/roles/prometheus_node_exporter/templates/node_exporter.service.j2 @@ -0,0 +1,11 @@ +[Unit] +Description=NodeExporter + +[Service] +TimeoutStartSec=0 +User=node_exporter +ExecStart={{ node_exporter_bin_path }} --web.listen-address={{ node_exporter_host }}:{{ node_exporter_port }} {{ node_exporter_options }} +Restart={{ node_exporter_restart }} + +[Install] +WantedBy=multi-user.target diff --git a/scripts/cluster-migration/benchmark.sql b/scripts/cluster-migration/benchmark.sql new file mode 100644 index 00000000..55e06781 --- /dev/null +++ b/scripts/cluster-migration/benchmark.sql @@ -0,0 +1,55 @@ +SELECT + countIf ( + anomaly = 't' + AND confirmed = 'f' + AND msm_failure = 'f' + ) AS anomaly_count, + countIf ( + confirmed = 't' + AND msm_failure = 'f' + ) AS confirmed_count, + countIf (msm_failure = 't') AS failure_count, + countIf ( + anomaly = 'f' + AND confirmed = 'f' + AND msm_failure = 'f' + ) AS ok_count, + COUNT(*) AS measurement_count, + domain +FROM + fastpath +WHERE + measurement_start_time >= '2024-11-01' + AND measurement_start_time < '2024-11-10' + AND probe_cc = 'IT' +GROUP BY + domain; + +SELECT + COUNT(*) AS measurement_count, + domain +FROM + analysis_web_measurement +WHERE + measurement_start_time >= '2024-11-01' + AND measurement_start_time < '2024-11-10' + AND probe_cc = 'IT' +GROUP BY + domain; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MODIFY +ORDER BY + ( + measurement_start_time, + probe_cc, + probe_asn, + domain, + measurement_uid + ) +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster ADD INDEX IF NOT EXISTS measurement_start_time_idx measurement_start_time TYPE minmax GRANULARITY 2; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MATERIALIZE INDEX measurement_start_time_idx; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster ADD INDEX IF NOT EXISTS probe_cc_idx probe_cc TYPE minmax GRANULARITY 1; + +ALTER TABLE ooni.analysis_web_measurement ON CLUSTER oonidata_cluster MATERIALIZE INDEX probe_cc_idx; \ No newline at end of file diff --git a/scripts/cluster-migration/db-sample.py b/scripts/cluster-migration/db-sample.py new file mode 100644 index 00000000..d4544135 --- /dev/null +++ b/scripts/cluster-migration/db-sample.py @@ -0,0 +1,33 @@ +from datetime import datetime, timedelta +import csv + +from tqdm import tqdm +from clickhouse_driver import Client as ClickhouseClient + + +START_TIME = datetime(2024, 11, 1, 0, 0, 0) +END_TIME = datetime(2024, 11, 10, 0, 0, 0) +SAMPLE_SIZE = 100 + + +def sample_to_file(table_name): + with ClickhouseClient.from_url("clickhouse://localhost/ooni") as click, open( + f"{table_name}-sample.csv", "w" + ) as out_file: + writer = csv.writer(out_file) + ts = START_TIME + while ts < END_TIME: + for row in click.execute_iter( + f""" + SELECT * FROM {table_name} + WHERE measurement_uid LIKE '{ts.strftime("%Y%m%d%H")}%' + ORDER BY measurement_uid LIMIT {SAMPLE_SIZE} + """ + ): + writer.writerow(row) + ts += timedelta(hours=1) + + +if __name__ == "__main__": + sample_to_file("obs_web") + sample_to_file("analysis_web_measurement") From 1b1a6b593afdb686309337bfc5941d1f392a457b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 4 Dec 2024 18:19:08 -0500 Subject: [PATCH 43/88] Align job_name --- ansible/roles/prometheus/templates/prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index bed0464e..f4111045 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -163,7 +163,7 @@ scrape_configs: - data1.htz-fsn.prod.ooni.nu:9100 - data3.htz-fsn.prod.ooni.nu:9100 - - job_name: 'node new' + - job_name: 'node' scrape_interval: 5s scheme: http metrics_path: "/metrics/node_exporter" From 461245718a0d7544f6c17bc1afee2fdafd6339bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 4 Dec 2024 18:27:13 -0500 Subject: [PATCH 44/88] rollback node job name --- ansible/roles/prometheus/templates/prometheus.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/templates/prometheus.yml b/ansible/roles/prometheus/templates/prometheus.yml index f4111045..bed0464e 100755 --- a/ansible/roles/prometheus/templates/prometheus.yml +++ b/ansible/roles/prometheus/templates/prometheus.yml @@ -163,7 +163,7 @@ scrape_configs: - data1.htz-fsn.prod.ooni.nu:9100 - data3.htz-fsn.prod.ooni.nu:9100 - - job_name: 'node' + - job_name: 'node new' scrape_interval: 5s scheme: http metrics_path: "/metrics/node_exporter" From 797161b62e93fae1e6f26028099dbfc276c133db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 4 Dec 2024 18:28:59 -0500 Subject: [PATCH 45/88] Rename backend deploy role --- ansible/{playbook-backend.yml => deploy-ooni-backend.yml} | 0 ansible/deploy-tier0.yml | 3 +++ 2 files changed, 3 insertions(+) rename ansible/{playbook-backend.yml => deploy-ooni-backend.yml} (100%) diff --git a/ansible/playbook-backend.yml b/ansible/deploy-ooni-backend.yml similarity index 100% rename from ansible/playbook-backend.yml rename to ansible/deploy-ooni-backend.yml diff --git a/ansible/deploy-tier0.yml b/ansible/deploy-tier0.yml index ffe68c02..7c11a8c6 100644 --- a/ansible/deploy-tier0.yml +++ b/ansible/deploy-tier0.yml @@ -2,6 +2,9 @@ - name: Include monitoring playbook ansible.builtin.import_playbook: deploy-monitoring.yml +- name: Include ooni-backend playbook + ansible.builtin.import_playbook: deploy-ooni-backend.yml + - name: Include clickhouse playbook ansible.builtin.import_playbook: deploy-clickhouse.yml From 427b248480b2d5a38bcb886356f164bfb63733a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 4 Dec 2024 18:34:09 -0500 Subject: [PATCH 46/88] Reformat --- ansible/group_vars/all/vars.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index d18687cd..c0b94053 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -2,11 +2,17 @@ ssh_users: agrabeli: login: agrabeli comment: Maria Xynou - keys: ["ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD0JSwM+t3Uz9lS3Mjoz9oo4vOToWyzboZhYQbP8JY5HvFtAvWanWHnUBO91t6hkgKIMiUqhdCJn26fqkhSGe/bRBaFUocOmuyfcmZoRdi0qzAskmycJsj/w6vWR4x6MYkmJvSeI/MGxjEFt4s2MfOG1tP8CBLUYft9qUleeJa7Jln8c+xbnqB7YngaI190icQHE9NuIB2CXvzbmo3tLtHNMagEwI7VoBDj6mxzTxBd9JhuhF4w5uGxxm0Gp1hzk+15obNnaBS+Anr7jXz8FPwwxCH+XhBZxB1PPpcIayKrf9iLyGtwmhkdDoWCqYAr1mue3LxFso+TZF4bwE4Cjt1 agrabelh@agrabelh"] + keys: + [ + "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD0JSwM+t3Uz9lS3Mjoz9oo4vOToWyzboZhYQbP8JY5HvFtAvWanWHnUBO91t6hkgKIMiUqhdCJn26fqkhSGe/bRBaFUocOmuyfcmZoRdi0qzAskmycJsj/w6vWR4x6MYkmJvSeI/MGxjEFt4s2MfOG1tP8CBLUYft9qUleeJa7Jln8c+xbnqB7YngaI190icQHE9NuIB2CXvzbmo3tLtHNMagEwI7VoBDj6mxzTxBd9JhuhF4w5uGxxm0Gp1hzk+15obNnaBS+Anr7jXz8FPwwxCH+XhBZxB1PPpcIayKrf9iLyGtwmhkdDoWCqYAr1mue3LxFso+TZF4bwE4Cjt1 agrabelh@agrabelh", + ] art: login: art comment: Arturo Filasto - keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJsibU0nsQFFIdolD1POzXOws4VetV0ZNByINRzY8Hx0 arturo@ooni.org"] + keys: + [ + "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJsibU0nsQFFIdolD1POzXOws4VetV0ZNByINRzY8Hx0 arturo@ooni.org", + ] majakomel: login: majakomel comment: Maja Komel @@ -23,9 +29,9 @@ ssh_users: keys: - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDBXprrutdT6AhrV9hWBKjyzq6RqGmCBWpWxi3qwJyRcBJfkiEYKV9QWl3H0g/Sg9JzLd9lWG2yfAai7cyBAT4Ih0+OhwQ0V7wkhBn4YkNjs7d4BGPHjuLIywS9VtmiyH7VafikMjmqPLL/uPBIbRrx9RuSfLkAuN9XFZpVmqzWY8ePpcRCvnG6ucPxEY8o+4j5nfTrgxSaIT31kH16/PFJe07tn1SZjxZE4sZTz/p9xKt6s8HXmlP3RdnXSpXWmH8ZwYDrNhkcH8m6mC3giiqSKThFdwvQVflRRvn9pAlUOhy6KIBtAt1KobVJtOCPrrkcLhQ1C+2P9wKhfYspCGrScFGnrUqumLxPpwlqILxJvmgqGAtkm8Ela9f2D9sEv8CUv5x9XptZKlyRhtOLixvLYoJlwfXXnmXa8T1pg8+4063BhHUOu/bg0InpSp3hdscOfk0R8FtDlXnn6COwbPXynIt4PxzIxD/WQhP0ymgH3ky6ClB5wRBVhOqYvxQw32n2QFS9A5ocga+nATiOE7BTOufgmDCA/OIXfJ/GukXRaMCBsvlx7tObHS1LOMt0I+WdoOEjI0ARUrFzwoiTrs9QYmd922e7S35EnheT3JjnCTjebJrCNtwritUy8vjsN/M27wJs7MAXleT7drwXXnm+3xYrH+4KQ+ru0dxMe1zfBw== aanorbel@gmail.com" -admin_usernames: [ art, mehul ] -root_usernames: [ art, mehul ] -non_admin_usernames: [ ] -deactivated_usernames: [ sbs, federico, sarath ] +admin_usernames: [art, mehul] +root_usernames: [art, mehul] +non_admin_usernames: [] +deactivated_usernames: [sbs, federico, sarath] prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" From 04dbf7b11350c854b22fd54b6d1f4c8091896a3e Mon Sep 17 00:00:00 2001 From: decfox Date: Thu, 5 Dec 2024 11:30:27 -0500 Subject: [PATCH 47/88] cleanup extra roles --- ansible/deploy-ooni-backend.yml | 18 ++-- ansible/roles/base-backend/README.adoc | 1 + ansible/roles/base-backend/handlers/main.yml | 15 +++ .../meta/main.yml | 0 .../tasks/main.yml | 93 ++----------------- .../templates/internal-deb.gpg | 0 .../templates/journald.conf | 0 .../templates/netdata.conf | 0 .../templates/ooni_internal.sources | 0 .../templates/resolved.conf | 4 +- .../templates/sources.list | 0 ansible/roles/base-bookworm/README.adoc | 1 - ansible/roles/bootstrap/tasks/main.yml | 7 ++ ansible/roles/nftables-sysadmin/README.adoc | 25 ----- .../roles/nftables-sysadmin/tasks/main.yml | 49 ---------- .../nftables-sysadmin/templates/nftables.conf | 41 -------- .../nginx-buster/files/ffdhe2048_dhparam.pem | 8 -- .../nginx-buster/files/ssl_intermediate.conf | 3 - .../roles/nginx-buster/files/ssl_modern.conf | 4 - ansible/roles/nginx-buster/handlers/main.yml | 6 -- ansible/roles/nginx-buster/tasks/main.yml | 33 ------- .../roles/nginx-buster/templates/nginx.conf | 68 -------------- 22 files changed, 42 insertions(+), 334 deletions(-) create mode 100644 ansible/roles/base-backend/README.adoc create mode 100644 ansible/roles/base-backend/handlers/main.yml rename ansible/roles/{base-bookworm => base-backend}/meta/main.yml (100%) rename ansible/roles/{base-bookworm => base-backend}/tasks/main.yml (58%) rename ansible/roles/{base-bookworm => base-backend}/templates/internal-deb.gpg (100%) rename ansible/roles/{base-bookworm => base-backend}/templates/journald.conf (100%) rename ansible/roles/{base-bookworm => base-backend}/templates/netdata.conf (100%) rename ansible/roles/{base-bookworm => base-backend}/templates/ooni_internal.sources (100%) rename ansible/roles/{base-bookworm => base-backend}/templates/resolved.conf (67%) rename ansible/roles/{base-bookworm => base-backend}/templates/sources.list (100%) delete mode 100644 ansible/roles/base-bookworm/README.adoc delete mode 100644 ansible/roles/nftables-sysadmin/README.adoc delete mode 100644 ansible/roles/nftables-sysadmin/tasks/main.yml delete mode 100755 ansible/roles/nftables-sysadmin/templates/nftables.conf delete mode 100644 ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem delete mode 100644 ansible/roles/nginx-buster/files/ssl_intermediate.conf delete mode 100644 ansible/roles/nginx-buster/files/ssl_modern.conf delete mode 100644 ansible/roles/nginx-buster/handlers/main.yml delete mode 100644 ansible/roles/nginx-buster/tasks/main.yml delete mode 100644 ansible/roles/nginx-buster/templates/nginx.conf diff --git a/ansible/deploy-ooni-backend.yml b/ansible/deploy-ooni-backend.yml index c6b36579..7bc2cf78 100644 --- a/ansible/deploy-ooni-backend.yml +++ b/ansible/deploy-ooni-backend.yml @@ -1,16 +1,20 @@ --- - hosts: backend-hel.ooni.org roles: - - role: base-bookworm + - role: bootstrap - role: nftables - - role: nginx-buster + - role: nginx tags: nginx + vars: + nginx_user: "www-data" - role: dehydrated tags: dehydrated expand: yes - ssl_domains: - # with dehydrated the first entry is the cert FQDN - # and the other ones are alternative names - - "backend-hel.ooni.org" + vars: + ssl_domains: + # with dehydrated the first entry is the cert FQDN + # and the other ones are alternative names + - "backend-hel.ooni.org" - role: ooni-backend - ssl_domain: backend-hel.ooni.org + vars: + ssl_domain: backend-hel.ooni.org diff --git a/ansible/roles/base-backend/README.adoc b/ansible/roles/base-backend/README.adoc new file mode 100644 index 00000000..ac3f7039 --- /dev/null +++ b/ansible/roles/base-backend/README.adoc @@ -0,0 +1 @@ +Configure base host based on backend hosts diff --git a/ansible/roles/base-backend/handlers/main.yml b/ansible/roles/base-backend/handlers/main.yml new file mode 100644 index 00000000..4a8d06e8 --- /dev/null +++ b/ansible/roles/base-backend/handlers/main.yml @@ -0,0 +1,15 @@ +- name: reload nftables + tags: nftables + ansible.builtin.systemd_service: + name: nftables + state: reloaded + +- name: restart chrony + ansible.builtin.systemd: + name: chrony.service + state: restarted + +- name: restart netdata + ansible.builtin.systemd: + name: netdata.service + state: restarted diff --git a/ansible/roles/base-bookworm/meta/main.yml b/ansible/roles/base-backend/meta/main.yml similarity index 100% rename from ansible/roles/base-bookworm/meta/main.yml rename to ansible/roles/base-backend/meta/main.yml diff --git a/ansible/roles/base-bookworm/tasks/main.yml b/ansible/roles/base-backend/tasks/main.yml similarity index 58% rename from ansible/roles/base-bookworm/tasks/main.yml rename to ansible/roles/base-backend/tasks/main.yml index c9c3b1da..00a7352a 100644 --- a/ansible/roles/base-bookworm/tasks/main.yml +++ b/ansible/roles/base-backend/tasks/main.yml @@ -2,10 +2,6 @@ - name: motd shell: echo "" > /etc/motd -- name: Set hostname - ansible.builtin.hostname: - name: "{{ inventory_hostname }}" - - name: Remove apt repo tags: apt file: @@ -81,22 +77,6 @@ - tmux - vim -- name: Configure journald - tags: journald - template: - src: templates/journald.conf - dest: /etc/systemd/journald.conf - mode: 0644 - owner: root - -- name: enable and restart journald - tags: journald - systemd: - name: systemd-journald.service - state: restarted - enabled: yes - daemon_reload: yes - - name: Autoremove tags: autoremove apt: @@ -114,16 +94,8 @@ create: yes block: | add rule inet filter input ip saddr {{ lookup('dig', 'prometheus.ooni.org/A') }} tcp dport 19999 counter accept comment "netdata.service" - -#- name: reload nftables service -# systemd: -# name: nftables.service -# state: reloaded -# enabled: yes -# daemon_reload: yes - -- name: reload nftables service - service: name=nftables state=restarted + notify: + - reload nftables - name: configure netdata.service tags: netdata @@ -144,12 +116,8 @@ tags: timezone timezone: name: Etc/UTC - -- name: restart chrony service - tags: timezone - systemd: - name: chrony.service - state: restarted + notify: + - restart chrony - name: configure netdata chrony tags: netdata, timezone @@ -168,54 +136,5 @@ path: /usr/lib/netdata/conf.d/python.d.conf regexp: '^chrony:' line: 'chrony: yes' - -#- name: configure netdata nginx -# blockinfile: -# path: /etc/netdata/python.d/nginx.conf -# create: yes -# block: | -# # Managed by ansible, see roles/base-bookworm/tasks/main.yml -# update_every: 5 -# nginx_log: -# name : 'nginx_log' -# path : '/var/log/nginx/access.log' - -#- name: configure netdata haproxy -# blockinfile: -# path: /etc/netdata/python.d/haproxy.conf -# block: | -# # Managed by ansible, see roles/base-bookworm/tasks/main.yml -# update_every: 5 -# via_url: -# url: 'http://127.0.0.1:7000/haproxy_stats;csv;norefresh' - -- name: restart netdata service - tags: netdata, timezone - systemd: - name: netdata.service - state: restarted - - -- name: install systemd-resolved - tags: resolved - apt: - install_recommends: no - cache_valid_time: 86400 - name: - - systemd-resolved - -- name: configure systemd-resolved - tags: resolved - template: - src: resolved.conf - dest: /etc/systemd/resolved.conf - -- name: restart systemd-resolved - tags: resolved - systemd: - name: systemd-resolved.service - state: restarted - -- name: test systemd-resolved - tags: resolved - shell: resolvectl query go.dnscheck.tools --cache=no + notify: + - restart netdata diff --git a/ansible/roles/base-bookworm/templates/internal-deb.gpg b/ansible/roles/base-backend/templates/internal-deb.gpg similarity index 100% rename from ansible/roles/base-bookworm/templates/internal-deb.gpg rename to ansible/roles/base-backend/templates/internal-deb.gpg diff --git a/ansible/roles/base-bookworm/templates/journald.conf b/ansible/roles/base-backend/templates/journald.conf similarity index 100% rename from ansible/roles/base-bookworm/templates/journald.conf rename to ansible/roles/base-backend/templates/journald.conf diff --git a/ansible/roles/base-bookworm/templates/netdata.conf b/ansible/roles/base-backend/templates/netdata.conf similarity index 100% rename from ansible/roles/base-bookworm/templates/netdata.conf rename to ansible/roles/base-backend/templates/netdata.conf diff --git a/ansible/roles/base-bookworm/templates/ooni_internal.sources b/ansible/roles/base-backend/templates/ooni_internal.sources similarity index 100% rename from ansible/roles/base-bookworm/templates/ooni_internal.sources rename to ansible/roles/base-backend/templates/ooni_internal.sources diff --git a/ansible/roles/base-bookworm/templates/resolved.conf b/ansible/roles/base-backend/templates/resolved.conf similarity index 67% rename from ansible/roles/base-bookworm/templates/resolved.conf rename to ansible/roles/base-backend/templates/resolved.conf index dd937e3c..aa68eaf1 100644 --- a/ansible/roles/base-bookworm/templates/resolved.conf +++ b/ansible/roles/base-backend/templates/resolved.conf @@ -2,8 +2,8 @@ # See roles/base-bookworm/templates/resolved.conf [Resolve] -## https://meta.wikimedia.org/wiki/Wikimedia_DNS -DNS=185.71.138.138 +DNS=9.9.9.9 +FallbackDNS=1.1.1.1 8.8.8.8 DNSOverTLS=opportunistic DNSSEC=allow-downgrade Cache=yes diff --git a/ansible/roles/base-bookworm/templates/sources.list b/ansible/roles/base-backend/templates/sources.list similarity index 100% rename from ansible/roles/base-bookworm/templates/sources.list rename to ansible/roles/base-backend/templates/sources.list diff --git a/ansible/roles/base-bookworm/README.adoc b/ansible/roles/base-bookworm/README.adoc deleted file mode 100644 index be579005..00000000 --- a/ansible/roles/base-bookworm/README.adoc +++ /dev/null @@ -1 +0,0 @@ -Configure base host based on Bookworm diff --git a/ansible/roles/bootstrap/tasks/main.yml b/ansible/roles/bootstrap/tasks/main.yml index 500d58ff..de1c174f 100644 --- a/ansible/roles/bootstrap/tasks/main.yml +++ b/ansible/roles/bootstrap/tasks/main.yml @@ -55,6 +55,13 @@ tags: - nftables +- name: Set the backend host configuration if valid + when: inventory_hostname == 'backend-hel.ooni.org' + ansible.builtin.include_role: + name: base-backend + tags: + - base-backend + - name: Configure journald tags: - journald diff --git a/ansible/roles/nftables-sysadmin/README.adoc b/ansible/roles/nftables-sysadmin/README.adoc deleted file mode 100644 index e3bef58f..00000000 --- a/ansible/roles/nftables-sysadmin/README.adoc +++ /dev/null @@ -1,25 +0,0 @@ -Install nftables based firewall - -Set up /etc/ooni/nftables/ - -Rules for specific services are *not* configured by this role - -When creating rules to accept TCP traffic from any IPv4/6 address, -files are named with the port number to detect collisions. - -Example (also see roles/nftables/tasks/main.yml): - -/etc/ooni/nftables/tcp/8080.nft - -``` -add rule inet filter input tcp dport 8080 counter accept comment "MyService" -``` - - -Otherwise: - -/etc/ooni/nftables/tcp/5432_postgres_internal.nft - -``` -add rule inet filter input ip saddr { 10.0.0.0/8, 192.168.0.0/16 } tcp dport 5432 counter accept comment "Internal PostgreSQL" -``` diff --git a/ansible/roles/nftables-sysadmin/tasks/main.yml b/ansible/roles/nftables-sysadmin/tasks/main.yml deleted file mode 100644 index 886c7ab0..00000000 --- a/ansible/roles/nftables-sysadmin/tasks/main.yml +++ /dev/null @@ -1,49 +0,0 @@ ---- -- name: Install nftables - tags: nftables - apt: - cache_valid_time: 86400 - name: nftables - -- name: create config dir - tags: nftables - file: - path: /etc/ooni/nftables/tcp - state: directory - owner: root - group: root - mode: 0755 - -- name: allow SSH - tags: nftables - blockinfile: - path: /etc/ooni/nftables/tcp/22.nft - create: yes - block: | - add rule inet filter input tcp dport 22 counter accept comment "Incoming SSH" - -- name: Overwrite nftables.conf - tags: nftables - template: - src: templates/nftables.conf - dest: /etc/nftables.conf - mode: 0755 - owner: root - -- name: enable nftables service - tags: nftables - shell: systemctl enable nftables.service - -- name: enable nftables service - tags: nftables - shell: systemctl start nftables.service - -#- name: Enable and start nftables service -# systemd: -# name: nftables.service -# state: reloaded -# enabled: yes - -- name: reload nftables service - tags: nftables - shell: systemctl reload nftables.service diff --git a/ansible/roles/nftables-sysadmin/templates/nftables.conf b/ansible/roles/nftables-sysadmin/templates/nftables.conf deleted file mode 100755 index 5f7b50cc..00000000 --- a/ansible/roles/nftables-sysadmin/templates/nftables.conf +++ /dev/null @@ -1,41 +0,0 @@ -#!/usr/sbin/nft -f -# -# Nftables configuration script -# -# Managed by ansible -# roles/nftables/templates/nftables.conf -# -# The ruleset is applied atomically - -flush ruleset - -table inet filter { - chain input { - type filter hook input priority 0; - policy drop; - iif lo accept comment "Accept incoming traffic from localhost" - ct state invalid drop - ct state established,related accept comment "Accept traffic related to outgoing connections" - icmp type echo-request accept - icmpv6 type echo-request counter packets 0 bytes 0 accept - icmpv6 type { nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert } ip6 hoplimit 1 accept - icmpv6 type { nd-router-advert, nd-neighbor-solicit, nd-neighbor-advert } ip6 hoplimit 255 counter packets 1 bytes 72 accept - } - - chain forward { - type filter hook forward priority 0; - policy accept; - } - - chain output { - type filter hook output priority 0; - policy accept; - } -} - -# Configure TCP traffic rules -include "/etc/ooni/nftables/tcp/*.nft" - -# Configure any other rule -include "/etc/ooni/nftables/*.nft" - diff --git a/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem b/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem deleted file mode 100644 index 9b182b72..00000000 --- a/ansible/roles/nginx-buster/files/ffdhe2048_dhparam.pem +++ /dev/null @@ -1,8 +0,0 @@ ------BEGIN DH PARAMETERS----- -MIIBCAKCAQEA//////////+t+FRYortKmq/cViAnPTzx2LnFg84tNpWp4TZBFGQz -+8yTnc4kmz75fS/jY2MMddj2gbICrsRhetPfHtXV/WVhJDP1H18GbtCFY2VVPe0a -87VXE15/V8k1mE8McODmi3fipona8+/och3xWKE2rec1MKzKT0g6eXq8CrGCsyT7 -YdEIqUuyyOP7uWrat2DX9GgdT0Kj3jlN9K5W7edjcrsZCwenyO4KbXCeAvzhzffi -7MA0BM0oNC9hkXL+nOmFg/+OTxIy7vKBg8P+OxtMb61zO7X8vC7CIAXFjvGDfRaD -ssbzSibBsu/6iGtCOGEoXJf//////////wIBAg== ------END DH PARAMETERS----- diff --git a/ansible/roles/nginx-buster/files/ssl_intermediate.conf b/ansible/roles/nginx-buster/files/ssl_intermediate.conf deleted file mode 100644 index 96d2e6e2..00000000 --- a/ansible/roles/nginx-buster/files/ssl_intermediate.conf +++ /dev/null @@ -1,3 +0,0 @@ -# Oldest compatible clients: Firefox 1, Chrome 1, IE 7, Opera 5, Safari 1, Windows XP IE8, Android 2.3, Java 7 -ssl_protocols TLSv1 TLSv1.1 TLSv1.2; # Dropping SSLv3, ref: POODLE -ssl_ciphers 'ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256:ECDHE-ECDSA-AES128-SHA:ECDHE-RSA-AES256-SHA384:ECDHE-RSA-AES128-SHA:ECDHE-ECDSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA:ECDHE-RSA-AES256-SHA:DHE-RSA-AES128-SHA256:DHE-RSA-AES128-SHA:DHE-RSA-AES256-SHA256:DHE-RSA-AES256-SHA:ECDHE-ECDSA-DES-CBC3-SHA:ECDHE-RSA-DES-CBC3-SHA:EDH-RSA-DES-CBC3-SHA:AES128-GCM-SHA256:AES256-GCM-SHA384:AES128-SHA256:AES256-SHA256:AES128-SHA:AES256-SHA:DES-CBC3-SHA:!DSS'; diff --git a/ansible/roles/nginx-buster/files/ssl_modern.conf b/ansible/roles/nginx-buster/files/ssl_modern.conf deleted file mode 100644 index 9ad7c11d..00000000 --- a/ansible/roles/nginx-buster/files/ssl_modern.conf +++ /dev/null @@ -1,4 +0,0 @@ -# Oldest compatible clients: Firefox 27, Chrome 30, IE 11 on Windows 7, Edge, Opera 17, Safari 9, Android 5.0, and Java 8 -ssl_protocols TLSv1.2; -ssl_ciphers 'ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES128-SHA256:ECDHE-RSA-AES128-SHA256'; -# NB: technically, it does not require ssl_dhparam as it has no DHE, only ECDHE. diff --git a/ansible/roles/nginx-buster/handlers/main.yml b/ansible/roles/nginx-buster/handlers/main.yml deleted file mode 100644 index b180da14..00000000 --- a/ansible/roles/nginx-buster/handlers/main.yml +++ /dev/null @@ -1,6 +0,0 @@ -- name: restart nginx - service: name=nginx state=restarted -- name: start nginx - service: name=nginx state=started -- name: reload nginx - service: name=nginx state=reloaded diff --git a/ansible/roles/nginx-buster/tasks/main.yml b/ansible/roles/nginx-buster/tasks/main.yml deleted file mode 100644 index 30f37e57..00000000 --- a/ansible/roles/nginx-buster/tasks/main.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -- name: install stable nginx - apt: - name: nginx - cache_valid_time: 86400 - notify: start nginx - -# https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 -# -# Guide https://wiki.mozilla.org/Security/Server_Side_TLS#Pre-defined_DHE_groups -# suggests ffdhe2048 instead of `openssl dhparam` to avoid https://weakdh.org/ -- name: copy nginx configuration snippets - copy: src={{item}} dest=/etc/nginx/{{ item }} mode=0444 owner=root group=root - with_items: - - ffdhe2048_dhparam.pem # ffdhe2048 Diffie-Hellman parameters - - ssl_intermediate.conf - - ssl_modern.conf - -- name: remove `default` vhost - file: path={{item}} state=absent - #notify: reload nginx - with_items: - - /etc/nginx/conf.d/default.conf - - /etc/nginx/sites-available/default - - /etc/nginx/sites-enabled/default - -- name: set nginx.conf - template: - src=nginx.conf - dest=/etc/nginx/nginx.conf - mode=0444 - #notify: reload nginx -... diff --git a/ansible/roles/nginx-buster/templates/nginx.conf b/ansible/roles/nginx-buster/templates/nginx.conf deleted file mode 100644 index f9e742a1..00000000 --- a/ansible/roles/nginx-buster/templates/nginx.conf +++ /dev/null @@ -1,68 +0,0 @@ - -# Managed by ansible -# roles/nginx-buster/templates/nginx.conf -# -# Generated with: -# https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.4 -# - -user www-data; -worker_processes auto; -pid /run/nginx.pid; -include /etc/nginx/modules-enabled/*.conf; - -events { - worker_connections 768; - # multi_accept on; -} - -http { - - # Basic Settings - - sendfile on; - tcp_nopush on; # TCP_CORK HTTP headers with sendfile() body into single packet - types_hash_max_size 2048; - # server_tokens off; - - # server_names_hash_bucket_size 64; - # server_name_in_redirect off; - - include /etc/nginx/mime.types; - default_type application/octet-stream; - - # Logging Settings - - # anonymize ipaddr - map $remote_addr $remote_addr_anon { - ~(?P\d+\.\d+\.\d+)\. $ip.0; - ~(?P[^:]+:[^:]+): $ip::; - default 0.0.0.0; - } - - # log anonymized ipaddr and caching status - log_format ooni_nginx_fmt '$remote_addr_anon $upstream_cache_status [$time_local] ' - '"$request" $status $body_bytes_sent "$http_referer" "$http_user_agent"'; - - - access_log syslog:server=unix:/dev/log ooni_nginx_fmt; - error_log syslog:server=unix:/dev/log; - - # Gzip Settings - - gzip on; - - # gzip_vary on; - # gzip_proxied any; - # gzip_comp_level 6; - # gzip_buffers 16 8k; - # gzip_http_version 1.1; - # gzip_types text/plain text/css application/json application/javascript text/xml application/xml application/xml+rss text/javascript; - - # Virtual Host Configs - - include /etc/nginx/conf.d/*.conf; - include /etc/nginx/sites-enabled/*; - -} - From b4f558074ba290d4313617327b1fe770b54d2eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 12:10:18 -0500 Subject: [PATCH 48/88] Drop discontinued hosts from monitoring --- ansible/roles/prometheus/vars/main.yml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index abf6d469..01ae359f 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -1,8 +1,6 @@ dom0_hosts: - ams-ps.ooni.nu - ams-slack-1.ooni.org - - ams-wcth2.ooni.nu - - ams-wcth3.ooni.nu - amsmatomo.ooni.nu - db-1.proteus.ooni.io - doams1-countly.ooni.nu @@ -15,8 +13,9 @@ blackbox_jobs: targets: # - "https://a.web-connectivity.th.ooni.io/status" - "https://wcth.ooni.io/status" - - "https://ams-wcth2.ooni.nu/status" - - "https://a.web-connectivity.th.ooni.io/status" # "https://ams-wcth3.ooni.nu/status" + # TODO add these records to the ALB config + #- "https://ams-wcth2.ooni.nu/status" + #- "https://a.web-connectivity.th.ooni.io/status" # "https://ams-wcth3.ooni.nu/status" # cloudfront - "https://d33d1gs9kpq1c5.cloudfront.net/status" From 7d33f551be4aea08de6561d0942b2f622a9ea5d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 14:55:02 -0500 Subject: [PATCH 49/88] Add backend reverse proxy to prod tf --- tf/environments/prod/main.tf | 54 +++++++++++++++++++++++++++++++++++- 1 file changed, 53 insertions(+), 1 deletion(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 477ffcdb..de10ddc9 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -315,6 +315,58 @@ module "ooni_backendproxy" { ) } +module "ooniapi_reverseproxy_deployer" { + source = "../../modules/ooniapi_service_deployer" + + service_name = "reverseproxy" + repo = "ooni/backend" + branch_name = "master" + buildspec_path = "ooniapi/services/reverseproxy/buildspec.yml" + codestar_connection_arn = aws_codestarconnections_connection.oonidevops.arn + + codepipeline_bucket = aws_s3_bucket.ooniapi_codepipeline_bucket.bucket + + ecs_service_name = module.ooniapi_reverseproxy.ecs_service_name + ecs_cluster_name = module.ooniapi_cluster.cluster_name +} + +module "ooniapi_reverseproxy" { + source = "../../modules/ooniapi_service" + + task_memory = 64 + + # First run should be set on first run to bootstrap the task definition + # first_run = true + + vpc_id = module.network.vpc_id + public_subnet_ids = module.network.vpc_subnet_public[*].id + private_subnet_ids = module.network.vpc_subnet_private[*].id + + service_name = "reverseproxy" + default_docker_image_url = "ooni/api-reverseproxy:latest" + stage = local.environment + dns_zone_ooni_io = local.dns_zone_ooni_io + key_name = module.adm_iam_roles.oonidevops_key_name + ecs_cluster_id = module.ooniapi_cluster.cluster_id + + task_secrets = { + PROMETHEUS_METRICS_PASSWORD = aws_secretsmanager_secret_version.prometheus_metrics_password.arn + } + + task_environment = { + TARGET_URL = "https://backend-hel.ooni.org/" + } + + ooniapi_service_security_groups = [ + module.ooniapi_cluster.web_security_group_id + ] + + tags = merge( + local.tags, + { Name = "ooni-tier0-reverseproxy" } + ) +} + ### OONI Services Clusters module "ooniapi_cluster" { @@ -569,7 +621,7 @@ module "ooniapi_frontend" { vpc_id = module.network.vpc_id subnet_ids = module.network.vpc_subnet_public[*].id - oonibackend_proxy_target_group_arn = module.ooni_backendproxy.alb_target_group_id + oonibackend_proxy_target_group_arn = module.ooniapi_reverseproxy.alb_target_group_id ooniapi_oonirun_target_group_arn = module.ooniapi_oonirun.alb_target_group_id ooniapi_ooniauth_target_group_arn = module.ooniapi_ooniauth.alb_target_group_id ooniapi_ooniprobe_target_group_arn = module.ooniapi_ooniprobe.alb_target_group_id From 6e99269fe693ee806ce326480639767d3c1c58bf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 15:45:11 -0500 Subject: [PATCH 50/88] Deploy oonibackendproxy --- tf/environments/prod/main.tf | 5 +++-- tf/modules/ooni_backendproxy/main.tf | 7 +++++++ tf/modules/ooni_th_droplet/main.tf | 1 + 3 files changed, 11 insertions(+), 2 deletions(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index de10ddc9..6bdc6a74 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -287,6 +287,7 @@ module "ooni_th_droplet" { "3d:81:99:17:b5:d1:20:a5:fe:2b:14:96:67:93:d6:34", "f6:4b:8b:e2:0e:d2:97:c5:45:5c:07:a6:fe:54:60:0e" ] + dns_zone_ooni_io = local.dns_zone_ooni_io } @@ -515,7 +516,7 @@ module "ooniapi_oonifindings_deployer" { module "ooniapi_oonifindings" { source = "../../modules/ooniapi_service" - first_run = true + # first_run = true vpc_id = module.network.vpc_id public_subnet_ids = module.network.vpc_subnet_public[*].id private_subnet_ids = module.network.vpc_subnet_private[*].id @@ -563,7 +564,7 @@ module "ooniapi_ooniauth_deployer" { module "ooniapi_ooniauth" { source = "../../modules/ooniapi_service" - #first_run = true + # first_run = true vpc_id = module.network.vpc_id private_subnet_ids = module.network.vpc_subnet_private[*].id diff --git a/tf/modules/ooni_backendproxy/main.tf b/tf/modules/ooni_backendproxy/main.tf index 81c98ee0..110461d3 100644 --- a/tf/modules/ooni_backendproxy/main.tf +++ b/tf/modules/ooni_backendproxy/main.tf @@ -17,6 +17,13 @@ resource "aws_security_group" "nginx_sg" { cidr_blocks = var.private_subnet_cidr } + ingress { + protocol = "tcp" + from_port = 80 + to_port = 80 + cidr_blocks = ["0.0.0.0/0"] + } + ingress { protocol = "tcp" from_port = 22 diff --git a/tf/modules/ooni_th_droplet/main.tf b/tf/modules/ooni_th_droplet/main.tf index b62b47e9..9836ac62 100644 --- a/tf/modules/ooni_th_droplet/main.tf +++ b/tf/modules/ooni_th_droplet/main.tf @@ -34,6 +34,7 @@ resource "digitalocean_droplet" "ooni_th_docker" { lifecycle { create_before_destroy = true + ignore_changes = all } } resource "aws_route53_record" "ooni_th" { From 2db24ad53259ae570cb4f82da9910e1ca6a80ab2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 15:46:52 -0500 Subject: [PATCH 51/88] Point reverse proxy to backend-fsn --- tf/environments/prod/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 6bdc6a74..48902a89 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -355,7 +355,7 @@ module "ooniapi_reverseproxy" { } task_environment = { - TARGET_URL = "https://backend-hel.ooni.org/" + TARGET_URL = "https://backend-fsn.ooni.org/" } ooniapi_service_security_groups = [ From 07b145e5dbe9423b4134fb3d1f61a6ea58eeeffb Mon Sep 17 00:00:00 2001 From: decfox Date: Fri, 6 Dec 2024 15:55:03 -0500 Subject: [PATCH 52/88] refactor: add base-backend to deployer notebook --- ansible/deploy-ooni-backend.yml | 1 + ansible/roles/bootstrap/tasks/main.yml | 7 ------- 2 files changed, 1 insertion(+), 7 deletions(-) diff --git a/ansible/deploy-ooni-backend.yml b/ansible/deploy-ooni-backend.yml index 7bc2cf78..24c70aac 100644 --- a/ansible/deploy-ooni-backend.yml +++ b/ansible/deploy-ooni-backend.yml @@ -2,6 +2,7 @@ - hosts: backend-hel.ooni.org roles: - role: bootstrap + - role: base-backend - role: nftables - role: nginx tags: nginx diff --git a/ansible/roles/bootstrap/tasks/main.yml b/ansible/roles/bootstrap/tasks/main.yml index de1c174f..500d58ff 100644 --- a/ansible/roles/bootstrap/tasks/main.yml +++ b/ansible/roles/bootstrap/tasks/main.yml @@ -55,13 +55,6 @@ tags: - nftables -- name: Set the backend host configuration if valid - when: inventory_hostname == 'backend-hel.ooni.org' - ansible.builtin.include_role: - name: base-backend - tags: - - base-backend - - name: Configure journald tags: - journald From 49a6e2ba590edd076e0d0ec5757123d8bdc89e39 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 16:14:36 -0500 Subject: [PATCH 53/88] Remove deleted hosts from DNS --- tf/environments/prod/dns_records.tf | 32 ----------------------------- 1 file changed, 32 deletions(-) diff --git a/tf/environments/prod/dns_records.tf b/tf/environments/prod/dns_records.tf index fd77fa54..a24b26e0 100644 --- a/tf/environments/prod/dns_records.tf +++ b/tf/environments/prod/dns_records.tf @@ -862,14 +862,6 @@ resource "aws_route53_record" "test-qemu-infra-ooni-io-_A_" { zone_id = local.dns_root_zone_ooni_io } -resource "aws_route53_record" "wcth-ooni-io-_A_" { - name = "wcth.ooni.io" - records = ["37.218.245.117"] - ttl = "60" - type = "A" - zone_id = local.dns_root_zone_ooni_io -} - resource "aws_route53_record" "www-ooni-io-_CNAME_" { name = "www.ooni.io" records = ["ooni.netlify.com"] @@ -886,30 +878,6 @@ resource "aws_route53_record" "ams-ps-ooni-nu-_A_" { zone_id = local.dns_root_zone_ooni_nu } -resource "aws_route53_record" "ams-wcth-ooni-nu-_A_" { - name = "ams-wcth.ooni.nu" - records = ["37.218.245.114"] - ttl = "300" - type = "A" - zone_id = local.dns_root_zone_ooni_nu -} - -resource "aws_route53_record" "ams-wcth2-ooni-nu-_A_" { - name = "ams-wcth2.ooni.nu" - records = ["37.218.247.47"] - ttl = "300" - type = "A" - zone_id = local.dns_root_zone_ooni_nu -} - -resource "aws_route53_record" "ams-wcth3-ooni-nu-_A_" { - name = "ams-wcth3.ooni.nu" - records = ["37.218.245.117"] - ttl = "300" - type = "A" - zone_id = local.dns_root_zone_ooni_nu -} - resource "aws_route53_record" "amsmatomo-ooni-nu-_A_" { name = "amsmatomo.ooni.nu" records = ["37.218.242.173"] From b3a170d7737bf73bd2ecc264d8d7fb972eb12e3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 6 Dec 2024 18:59:52 -0500 Subject: [PATCH 54/88] Drop wcth.ooni.io from monitoring --- ansible/roles/prometheus/vars/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index 01ae359f..49aa0f18 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -12,8 +12,8 @@ blackbox_jobs: module: "ooni_web_connectivity_ok" targets: # - "https://a.web-connectivity.th.ooni.io/status" - - "https://wcth.ooni.io/status" # TODO add these records to the ALB config + #- "https://wcth.ooni.io/status" #- "https://ams-wcth2.ooni.nu/status" #- "https://a.web-connectivity.th.ooni.io/status" # "https://ams-wcth3.ooni.nu/status" # cloudfront From c819886f82d4ee6341bc28dfa069636d50e2daa4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Sat, 7 Dec 2024 13:37:13 -0500 Subject: [PATCH 55/88] Fix alertmanager and wcth monitoring of cloudfront --- ansible/roles/prometheus/vars/main.yml | 20 +++++++++---------- .../templates/alertmanager.yml | 4 ++-- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index 49aa0f18..e77fe31f 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -8,16 +8,15 @@ dom0_hosts: - mia-httpth.ooni.nu blackbox_jobs: - - name: "ooni web_connectivity test helpers" - module: "ooni_web_connectivity_ok" - targets: - # - "https://a.web-connectivity.th.ooni.io/status" - # TODO add these records to the ALB config - #- "https://wcth.ooni.io/status" - #- "https://ams-wcth2.ooni.nu/status" - #- "https://a.web-connectivity.th.ooni.io/status" # "https://ams-wcth3.ooni.nu/status" - # cloudfront - - "https://d33d1gs9kpq1c5.cloudfront.net/status" + # TODO add these records to the ALB config + #- name: "ooni web_connectivity test helpers" + # module: "ooni_web_connectivity_ok" + # targets: + # # - "https://a.web-connectivity.th.ooni.io/status" + # #- "https://wcth.ooni.io/status" + # #- "https://ams-wcth2.ooni.nu/status" + # #- "https://a.web-connectivity.th.ooni.io/status" # "https://ams-wcth3.ooni.nu/status" + # # cloudfront - name: "new test helpers" module: "new_test_helper_health" @@ -29,6 +28,7 @@ blackbox_jobs: - "https://4.th.ooni.org/" - "https://5.th.ooni.org/" - "https://6.th.ooni.org/" + - "https://d33d1gs9kpq1c5.cloudfront.net/status" - name: "ooni collector" module: "ooni_collector_ok" diff --git a/ansible/roles/prometheus_alertmanager/templates/alertmanager.yml b/ansible/roles/prometheus_alertmanager/templates/alertmanager.yml index d9914808..eb36cf18 100755 --- a/ansible/roles/prometheus_alertmanager/templates/alertmanager.yml +++ b/ansible/roles/prometheus_alertmanager/templates/alertmanager.yml @@ -58,7 +58,7 @@ inhibit_rules: receivers: - name: 'team-all' email_configs: -{% for u in ['arturo', 'simone'] %} +{% for u in ['arturo', 'mehul'] %} - to: '{{ u }}@openobservatory.org' send_resolved: true smarthost: {{ am_mx_openobservatory }}:25 @@ -71,7 +71,7 @@ receivers: - name: 'team-email' # no slack email_configs: -{% for u in ['arturo', 'simone'] %} +{% for u in ['arturo', 'mehul'] %} - to: '{{ u }}@openobservatory.org' send_resolved: true smarthost: {{ am_mx_openobservatory }}:25 From 5d81f3ccd498537d818a19b83ea5e8ed36c16d19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 16 Dec 2024 18:38:30 +0100 Subject: [PATCH 56/88] Change storage type of postgresql --- tf/environments/prod/main.tf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index 48902a89..b4601950 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -159,8 +159,8 @@ module "oonipg" { vpc_id = module.network.vpc_id subnet_ids = module.network.vpc_subnet_public[*].id db_instance_class = "db.t3.micro" - db_storage_type = "standard" - db_allocated_storage = "5" + db_storage_type = "gp3" + db_allocated_storage = "20" db_max_allocated_storage = null tags = merge( local.tags, From 6d9d6e3559e4f80d57560b31f2c52423c92b91a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 16 Dec 2024 18:41:58 +0100 Subject: [PATCH 57/88] Bump storage size to minimum value --- tf/environments/prod/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index b4601950..f3a01dde 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -160,7 +160,7 @@ module "oonipg" { subnet_ids = module.network.vpc_subnet_public[*].id db_instance_class = "db.t3.micro" db_storage_type = "gp3" - db_allocated_storage = "20" + db_allocated_storage = "50" db_max_allocated_storage = null tags = merge( local.tags, From ab129d610f2070f3a9b9234682ddf8b90fc1bf95 Mon Sep 17 00:00:00 2001 From: DecFox <33030671+DecFox@users.noreply.github.com> Date: Wed, 18 Dec 2024 22:08:50 +0530 Subject: [PATCH 58/88] refactor: drop monitoring for db1 and amsmatomo (#130) We killed `db-1.proteus.ooni.io` and `amsmatomo.ooni.nu`. We can therefore, drop them from monitoring. --- ansible/roles/prometheus/vars/main.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/ansible/roles/prometheus/vars/main.yml b/ansible/roles/prometheus/vars/main.yml index e77fe31f..d8774c47 100644 --- a/ansible/roles/prometheus/vars/main.yml +++ b/ansible/roles/prometheus/vars/main.yml @@ -1,8 +1,6 @@ dom0_hosts: - ams-ps.ooni.nu - ams-slack-1.ooni.org - - amsmatomo.ooni.nu - - db-1.proteus.ooni.io - doams1-countly.ooni.nu - mia-echoth.ooni.nu - mia-httpth.ooni.nu From cd8f0c62802a33df230d599be2b8aecbc76296b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Thu, 19 Dec 2024 12:03:48 +0100 Subject: [PATCH 59/88] Add monitoring (#118) Migrate over the monitoring role from sysadmin to devops. Related to: https://github.com/ooni/devops/issues/27 --- ansible/ansible-playbook | 12 + ansible/deploy-bootstrap.yml | 7 + ansible/deploy-monitoring-config.yml | 10 + ansible/deploy-monitoring.yml | 10 +- ansible/inventory | 13 +- ansible/playbook.yml | 9 +- ansible/roles/monitoring/defaults/main.yml | 1 + .../monitoring/files/create_logs_table.sql | 56 ++++ .../monitoring/files/log-ingestion.service | 17 + .../roles/monitoring/tasks/log-ingestion.yml | 85 +++++ ansible/roles/monitoring/tasks/main.yml | 304 ++++++++++++++++++ ansible/roles/monitoring/templates/10514.nft | 2 + .../roles/monitoring/templates/clickhouse.gpg | 87 +++++ .../monitoring/templates/clickhouse.sources | 7 + .../templates/etc_default_prometheus | 4 + .../roles/monitoring/templates/grafana.gpg | 41 +++ .../roles/monitoring/templates/grafana.list | 1 + .../monitoring/templates/grafana.sources | 7 + ansible/roles/monitoring/templates/htpasswd | 5 + .../monitoring/templates/jupyter.service | 37 +++ ansible/roles/monitoring/templates/nginx.conf | 203 ++++++++++++ .../roles/monitoring/templates/vector.list | 2 + .../roles/monitoring/templates/vector.toml | 24 ++ ansible/roles/notify-slack/tasks/main.yml | 8 + .../notify-slack/templates/notify-slack.j2 | 10 + 25 files changed, 946 insertions(+), 16 deletions(-) create mode 100755 ansible/ansible-playbook create mode 100644 ansible/deploy-bootstrap.yml create mode 100644 ansible/deploy-monitoring-config.yml create mode 100644 ansible/roles/monitoring/defaults/main.yml create mode 100644 ansible/roles/monitoring/files/create_logs_table.sql create mode 100644 ansible/roles/monitoring/files/log-ingestion.service create mode 100644 ansible/roles/monitoring/tasks/log-ingestion.yml create mode 100644 ansible/roles/monitoring/tasks/main.yml create mode 100644 ansible/roles/monitoring/templates/10514.nft create mode 100644 ansible/roles/monitoring/templates/clickhouse.gpg create mode 100644 ansible/roles/monitoring/templates/clickhouse.sources create mode 100644 ansible/roles/monitoring/templates/etc_default_prometheus create mode 100644 ansible/roles/monitoring/templates/grafana.gpg create mode 100644 ansible/roles/monitoring/templates/grafana.list create mode 100644 ansible/roles/monitoring/templates/grafana.sources create mode 100644 ansible/roles/monitoring/templates/htpasswd create mode 100644 ansible/roles/monitoring/templates/jupyter.service create mode 100644 ansible/roles/monitoring/templates/nginx.conf create mode 100644 ansible/roles/monitoring/templates/vector.list create mode 100644 ansible/roles/monitoring/templates/vector.toml create mode 100644 ansible/roles/notify-slack/tasks/main.yml create mode 100644 ansible/roles/notify-slack/templates/notify-slack.j2 diff --git a/ansible/ansible-playbook b/ansible/ansible-playbook new file mode 100755 index 00000000..ffbc9904 --- /dev/null +++ b/ansible/ansible-playbook @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -ue + +## ansible-playbook is a wrapper script used to send a notification to slack +# whenever a new ansible deploy is triggered + +ANSIBLE_SLACK_CMD=`printf "%q " "$0" "$@"` +ANSIBLE_SLACK_CMD="${ANSIBLE_SLACK_CMD% }" # strip trailing whitespace +export ANSIBLE_SLACK_CMD + +ansible localhost --module-name include_role --args name=notify-slack +ansible-playbook "$@" diff --git a/ansible/deploy-bootstrap.yml b/ansible/deploy-bootstrap.yml new file mode 100644 index 00000000..81dccab1 --- /dev/null +++ b/ansible/deploy-bootstrap.yml @@ -0,0 +1,7 @@ +- name: Ensure all hosts are bootstrapped correctly + hosts: all + become: yes + roles: + - bootstrap + tags: + - bootstrap diff --git a/ansible/deploy-monitoring-config.yml b/ansible/deploy-monitoring-config.yml new file mode 100644 index 00000000..0c27527e --- /dev/null +++ b/ansible/deploy-monitoring-config.yml @@ -0,0 +1,10 @@ +--- +- name: Update monitoring config + hosts: monitoring.ooni.org + become: true + tags: + - monitoring + roles: + - prometheus + - prometheus_blackbox_exporter + - prometheus_alertmanager diff --git a/ansible/deploy-monitoring.yml b/ansible/deploy-monitoring.yml index a1eadee9..2fd77466 100644 --- a/ansible/deploy-monitoring.yml +++ b/ansible/deploy-monitoring.yml @@ -1,12 +1,12 @@ --- -- name: Update monitoring config +- name: Deploy monitoring host hosts: monitoring.ooni.org become: true tags: - monitoring roles: - - prometheus - - prometheus_blackbox_exporter - - prometheus_alertmanager - + - monitoring + vars: + monitoring_htpasswd: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/monitoring_htpasswd', profile='oonidevops_user_prod') }}" +- ansible.builtin.import_playbook: deploy-monitoring-config.yml diff --git a/ansible/inventory b/ansible/inventory index a44f8d45..bda9a486 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -1,6 +1,6 @@ [all:children] -htz-fsn -ghs-ams +htz_fsn +ghs_ams ## Role tags @@ -12,7 +12,7 @@ data3.htz-fsn.prod.ooni.nu ## Location tags -[htz-fsn] +[htz_fsn] data.ooni.org monitoring.ooni.org notebook.ooni.org @@ -20,5 +20,10 @@ data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu -[ghs-ams] +[ghs_ams] openvpn-server1.ooni.io +amsmatomo.ooni.nu +db-1.proteus.ooni.io +ams-slack-1.ooni.org +#mia-echoth.ooni.nu +#mia-httpth.ooni.nu diff --git a/ansible/playbook.yml b/ansible/playbook.yml index 17bcd402..7674acd9 100644 --- a/ansible/playbook.yml +++ b/ansible/playbook.yml @@ -1,11 +1,6 @@ --- -- name: Ensure all hosts are bootstrapped correctly - hosts: all - become: yes - roles: - - bootstrap - tags: - - bootstrap +- name: Include bootstrap playbook + ansible.builtin.import_playbook: deploy-bootstrap.yml - name: Include tier0 playbook ansible.builtin.import_playbook: deploy-tier0.yml diff --git a/ansible/roles/monitoring/defaults/main.yml b/ansible/roles/monitoring/defaults/main.yml new file mode 100644 index 00000000..47ef408f --- /dev/null +++ b/ansible/roles/monitoring/defaults/main.yml @@ -0,0 +1 @@ +enable_log_ingestion: false diff --git a/ansible/roles/monitoring/files/create_logs_table.sql b/ansible/roles/monitoring/files/create_logs_table.sql new file mode 100644 index 00000000..fe6a4cfa --- /dev/null +++ b/ansible/roles/monitoring/files/create_logs_table.sql @@ -0,0 +1,56 @@ +CREATE TABLE IF NOT EXISTS default.logs +( + `CODE_FILE` String, + `CODE_FUNC` String, + `CODE_LINE` String, + `INVOCATION_ID` String, + `LOGGER` LowCardinality(String), + `MESSAGE_ID` String, + `MESSAGE` String, + `PRIORITY` UInt8, + `PROCESS_NAME` String, + `SYSLOG_FACILITY` LowCardinality(String), + `SYSLOG_IDENTIFIER` LowCardinality(String), + `SYSLOG_PID` Nullable(UInt64), + `SYSLOG_TIMESTAMP` String, + `THREAD_NAME` String, + `TID` UInt64, + `UNIT` String, + `_AUDIT_LOGINUID` Nullable(UInt64), + `_AUDIT_SESSION` Nullable(UInt64), + `_BOOT_ID` String, + `_CAP_EFFECTIVE` String, + `_CMDLINE` String, + `_COMM` LowCardinality(String), + `_EXE` LowCardinality(String), + `_GID` LowCardinality(UInt32), + `_HOSTNAME` String, + `_KERNEL_DEVICE` String, + `_KERNEL_SUBSYSTEM` String, + `_MACHINE_ID` String, + `_PID` UInt32, + `_SELINUX_CONTEXT` String, + `_SOURCE_MONOTONIC_TIMESTAMP` Nullable(Int64), + `_SOURCE_REALTIME_TIMESTAMP` Int64, + `_STREAM_ID` String, + `_SYSTEMD_CGROUP` LowCardinality(String), + `_SYSTEMD_INVOCATION_ID` String, + `_SYSTEMD_SLICE` String, + `_SYSTEMD_UNIT` LowCardinality(String), + `_TRANSPORT` LowCardinality(String), + `_UDEV_SYSNAME` String, + `_UID` LowCardinality(UInt32), + `__CURSOR` String, + `__MONOTONIC_TIMESTAMP` Nullable(Int64), + `__REALTIME_TIMESTAMP` Int64, + `date` DateTime64(6) ALIAS fromUnixTimestamp64Micro(_SOURCE_REALTIME_TIMESTAMP), + `host` LowCardinality(String), + `inserted_at` DateTime DEFAULT now(), + `message` String, + `rtdate` DateTime64(6) ALIAS fromUnixTimestamp64Micro(__REALTIME_TIMESTAMP), + `timestamp` String, + INDEX timestamp_minmax_idx timestamp TYPE minmax GRANULARITY 1 +) +ENGINE = MergeTree +ORDER BY __REALTIME_TIMESTAMP +SETTINGS index_granularity = 8192 diff --git a/ansible/roles/monitoring/files/log-ingestion.service b/ansible/roles/monitoring/files/log-ingestion.service new file mode 100644 index 00000000..ac1e9483 --- /dev/null +++ b/ansible/roles/monitoring/files/log-ingestion.service @@ -0,0 +1,17 @@ +[Unit] +Description=log ingestion + +[Service] +ExecStart=/bin/sh -c 'journalctl -ojson -f | clickhouse-client --query="INSERT INTO logs FORMAT JSONEachRow" --input_format_skip_unknown_fields=1 --input_format_allow_errors_ratio=1' + +SystemCallFilter=~@clock @debug @cpu-emulation @keyring @module @mount @obsolete @raw-io @reboot @swap +NoNewPrivileges=yes +PrivateDevices=yes +PrivateTmp=yes +ProtectHome=yes +ProtectSystem=full +ProtectKernelModules=yes +ProtectKernelTunables=yes + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/monitoring/tasks/log-ingestion.yml b/ansible/roles/monitoring/tasks/log-ingestion.yml new file mode 100644 index 00000000..645f086f --- /dev/null +++ b/ansible/roles/monitoring/tasks/log-ingestion.yml @@ -0,0 +1,85 @@ +# # Vector + +- name: vector - enable repo + tags: vector + shell: extrepo enable vector && extrepo update vector + +- name: vector - install pkg + tags: vector + apt: + # refresh cache + cache_valid_time: 0 + name: + - vector + +- name: vector - deploy SQL file to create logs table + tags: vector + copy: + src: create_logs_table.sql + dest: /etc/clickhouse-server/create_logs_table.sql + +- name: vector - create vector_logs table + tags: vector + command: clickhouse-client --multiline --multiquery --queries-file /etc/clickhouse-server/create_logs_table.sql + +- name: vector - Generate syslog certificates + tags: vector + # runs locally + delegate_to: 127.0.0.1 + shell: | + ./vault view files/pusher_ca.key.vault | openssl req -x509 -new -nodes -key /dev/stdin -sha256 -days 3650 -subj '/O=OONI/OU=CA/CN=ooni.org' -out oonicacert.pem + openssl req -newkey rsa:2048 -nodes -days 3650 -keyout node.key -out node-req.pem -subj '/CN=ooni.org/O=OONI temp CA/C=US' -batch + ./vault view files/pusher_ca.key.vault | openssl x509 -req -days 3650 -set_serial 01 -in node-req.pem -out node-cert.pem -CA oonicacert.pem -CAkey /dev/stdin + register: certs_ready + +- name: vector - Copy TLS certs + tags: vector + ansible.builtin.copy: + src: "{{ item }}" + dest: /etc/vector/ + mode: '0440' + owner: vector + loop: + - oonicacert.pem + - node-cert.pem + - node.key + when: certs_ready.changed + +- name: vector - Delete files + tags: vector + # runs locally + delegate_to: 127.0.0.1 + ansible.builtin.file: + path: "{{ item }}" + state: absent + loop: + - node-cert.pem + - node-req.pem + - node.key + - oonicacert.pem + +- name: vector - configure + tags: vector + template: + src: templates/vector.toml + dest: /etc/vector/vector.toml + +- name: vector - open port + tags: vector + ansible.builtin.copy: + src: templates/10514.nft + dest: /etc/ooni/nftables/tcp/ + register: nft_reload_needed + +- name: vector - reload nft + tags: vector + shell: systemctl reload nftables.service + when: nft_reload_needed.changed + +- name: vector - restart service + tags: vector + systemd: + daemon_reload: yes + enabled: yes + name: vector.service + state: restarted diff --git a/ansible/roles/monitoring/tasks/main.yml b/ansible/roles/monitoring/tasks/main.yml new file mode 100644 index 00000000..24a45cf1 --- /dev/null +++ b/ansible/roles/monitoring/tasks/main.yml @@ -0,0 +1,304 @@ +--- +# # monitoring host # # + +- name: Set grafana apt repo + tags: monitoring, grafana + template: + src: templates/grafana.list + dest: /etc/apt/sources.list.d/grafana.list + mode: 0644 + owner: root + +- name: Installs packages + tags: monitoring, prometheus + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - apt-transport-https + - nginx + - prometheus + - prometheus-blackbox-exporter + - extrepo + +- name: Give cap_net_raw to prometheus-blackbox-exporter + tags: monitoring, prometheus, exporter_cap_net_raw + community.general.capabilities: + path: /usr/bin/prometheus-blackbox-exporter + capability: cap_net_raw+ep + state: present + +- name: Create Prometheus environment override + # Disable strict cert check https://pkg.go.dev/crypto/x509#Certificate.VerifyHostname + tags: monitoring, prometheus, override_cert_check + template: + src: templates/etc_default_prometheus + dest: /etc/default/prometheus + mode: 0644 + owner: root + +- name: Create Grafana repo GPG pubkey + tags: apt + template: + src: templates/grafana.gpg + dest: /etc/apt/grafana.asc + mode: 0644 + owner: root + +- name: Create Grafana sources list + tags: apt + template: + src: templates/grafana.sources + dest: /etc/apt/sources.list.d/grafana.sources + mode: 0644 + owner: root + +- name: Installs grafana + tags: monitoring, grafana + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - grafana + +- name: Configure grafana + tags: monitoring, grafana + lineinfile: + path: /etc/grafana/grafana.ini + regexp: '^;?domain = ' + line: domain = grafana.ooni.org + +- name: Autoremove + tags: monitoring + apt: + autoremove: yes + +- name: Clean cache + tags: monitoring + apt: + autoclean: yes + +- name: allow HTTPS + tags: monitoring + blockinfile: + path: /etc/ooni/nftables/tcp/443.nft + create: yes + block: | + add rule inet filter input tcp dport 443 counter accept comment "HTTPS" + +- name: reload nft + tags: monitoring + shell: systemctl reload nftables.service + +- name: enable grafana + tags: monitoring + shell: systemctl enable grafana-server + +- name: start grafana + tags: monitoring + shell: systemctl start grafana-server + +- name: Deploy nginx conf + tags: monitoring, grafana + template: + src: templates/nginx.conf + dest: /etc/nginx/sites-enabled/10-monitoring + mode: 0644 + owner: root + +# TODO(art): this htaccess file contains the password of federico which cannot +# be removed, since removing it leads to a bunch of alerts being triggerd. We +# should figure out where his personal password was used and replace it with a +# role based password that is shared and stored in our team keychain. +- name: copy monitoring.htpasswd + tags: monitoring, grafana, htpasswd + template: + src: templates/htpasswd + dest: /etc/nginx/monitoring.htpasswd + mode: 0440 + owner: www-data + group: www-data + +- name: reload nginx + tags: monitoring, grafana + shell: systemctl reload nginx + +- name: Installs packages + tags: jupyter + apt: + install_recommends: no + cache_valid_time: 86400 + name: + - jupyter-notebook + - jupyter-server + - python3-bottleneck + - python3-matplotlib + - python3-numpy + - python3-pandas + - python3-psycopg2 + - python3-scipy + - python3-seaborn + - python3-tables + - python3-tqdm + - python3-ujson + +- name: Install jupyter.service + tags: jupyter + template: + src: templates/jupyter.service + dest: /etc/systemd/system/jupyter.service + mode: 0755 + owner: root + +- name: create jupyter dir + tags: jupyter + file: + path: /var/lib/jupyter/conf + state: directory + owner: jupyter + group: jupyter + +- name: create jupyter conf + tags: jupyter + blockinfile: + path: /var/lib/jupyter/conf/jupyter_notebook_config.py + create: yes + owner: jupyter + group: jupyter + block: | + c.NotebookApp.allow_remote_access = True + c.NotebookApp.enable_mathjax = False + c.NotebookApp.open_browser = False + c.NotebookApp.password = u'' + c.NotebookApp.quit_button = False + c.NotebookApp.token = '' + +- name: reload systemd + tags: jupyter + shell: systemctl daemon-reload + +- name: Start jupyter + tags: jupyter + systemd: + name: jupyter.service + state: started + enabled: yes + +- name: Configure fail2ban + tags: fail2ban + lineinfile: + path: /etc/fail2ban/jail.conf + regexp: '^backend ' + line: backend = systemd + +- name: Configure fail2ban + tags: fail2ban + blockinfile: + create: yes + path: /etc/fail2ban/jail.d/ooni.conf + block: | + [nginx-http-auth] + enabled = true + filter = nginx-http-auth + port = http,https + journalmatch = _SYSTEMD_UNIT=nginx.service PRIORITY=3 + + [nginx-400] + enabled = true + port = http,https + filter = nginx-400 + maxretry = 2 + findtime = 300 + +- name: Configure fail2ban + tags: fail2ban + blockinfile: + create: yes + path: /etc/fail2ban/filter.d/nginx-400.conf + block: | + [Definition] + failregex = ^ -.*"(GET|POST|HEAD).*HTTP.*" 400 + ignoreregex = + +- name: reload fail2ban + tags: fail2ban + shell: systemctl reload fail2ban.service + +# jupycron + +- name: Install jupycron + tags: jupycron + apt: + cache_valid_time: 86400 + state: present + name: jupycron + +# # clickhouse # # + +- name: install clickhouse requirements + tags: clickhouse + apt: + cache_valid_time: 86400 + state: present + name: + - apt-transport-https + - ca-certificates + - dirmngr + +- name: install clickhouse keys + tags: clickhouse + template: + src: templates/clickhouse.gpg + dest: /etc/apt/clickhouse.asc + mode: 0644 + owner: root + +- name: set clickhouse repos + tags: clickhouse + template: + src: templates/clickhouse.sources + dest: /etc/apt/sources.list.d/clickhouse.sources + mode: 0644 + owner: root + +- name: pin clickhouse release train + tags: clickhouse + blockinfile: + path: /etc/apt/preferences.d/clickhouse-server + create: yes + block: | + Package: clickhouse-server + Pin: version 23.1.3.* + Pin-Priority: 999 + +- name: install clickhouse + tags: clickhouse + apt: + # refresh cache + cache_valid_time: 0 + name: + - clickhouse-server={{ clickhouse_pkg_ver }} + - clickhouse-client={{ clickhouse_pkg_ver }} + - clickhouse-common-static={{ clickhouse_pkg_ver }} + vars: + clickhouse_pkg_ver: 23.9.* + +#- name: install clickhouse conf override +# tags: clickhouse +# template: +# src: clickhouse_config.xml +# dest: /etc/clickhouse-server/config.d/ooni_conf.xml +# owner: clickhouse +# group: clickhouse +# mode: 0400 +# notify: restart clickhouse + +- name: Run clickhouse + tags: clickhouse + systemd: + name: clickhouse-server.service + state: started + enabled: yes + +- include_tasks: log-ingestion.yml + when: enable_log_ingestion diff --git a/ansible/roles/monitoring/templates/10514.nft b/ansible/roles/monitoring/templates/10514.nft new file mode 100644 index 00000000..1dd9dadc --- /dev/null +++ b/ansible/roles/monitoring/templates/10514.nft @@ -0,0 +1,2 @@ +# Manged by ansible roles/monitoring/templates/10514.nft +add rule inet filter input tcp dport 10514 counter accept comment "incoming logs" diff --git a/ansible/roles/monitoring/templates/clickhouse.gpg b/ansible/roles/monitoring/templates/clickhouse.gpg new file mode 100644 index 00000000..ae3a0dcf --- /dev/null +++ b/ansible/roles/monitoring/templates/clickhouse.gpg @@ -0,0 +1,87 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQINBGGbfsABEAC+01IxYaykEHIBW4dom/YipkmjPGbkU/oDibqsHE8VgDLC10Xt +Glto0H9q1fsTIi1nZ9S4lWUuuEAFgcPBt82qnT3E11ZCDC5khjFvPb7d0iK5Wg5T +a1hHNLxuqvRn4sOCyMp7LZJkgLFavvGnXWsHOmB3TzQRaQouc+Y21hqANXFCZXCD +be3Wbdy86ZHOezoWEdHaU1868ZlCPMq10lpBFcLzvdeEfMDAYhOE43c5X0tSbxNo +//kingPLPQ7nxbgsGDKNIsghazmKyMaJA+qpzMmu8qo1ioPZfW8GcYC06cBBAklQ +2dQmGqNvpdJeItBeis9kfJvfkWYCbQ5+ebmm/uJDGCXosIf3/OliMykJhiJzILp5 +1AJqCBD6muL5aP7UorGtVrCQeW2DKB6vtw73qoWmIAmMdW80Nr2lkFu8sDM8VeCI +YUULVa2mkuAet8B3TEtFs6dRZZZNlw6fNny6gmN1dbh+aDebilkHn7eOm8A3qUO3 +PgYNP8swIuGBDQ37lEKZaN6glL7+h0TY9Pz0UsBlCOOh0f3jrjVR2eseQfMtm5TU +/cqY7/+eTzycRpHRHl+xiSiUT1XwyzCf0sAEuMX6d1Um0KFXWWHYdpe4/P4yQmLw +Ex3JaYQHM7EBbLsj9vK7J9qAnrtH0kdyz1RBVE1SV0APS7qNec2GBUXh5QARAQAB +tDpDbGlja0hvdXNlIEluYy4gUmVwb3NpdG9yaWVzIEtleSA8cGFja2FnZXNAY2xp +Y2tob3VzZS5jb20+iQJOBBMBCAA4FiEEOp6hGTqXtUi+FFfUiRn2vStI11QFAmGb +fsACGwMFCwkIBwIGFQoJCAsCBBYCAwECHgECF4AACgkQiRn2vStI11R0OA/9GHe5 +Gfco4zWrPSX1cCM1sZZeLZUXeRu4iJlJPahi1sjP2J0rcuv5Xp7paW23el0ksXf4 +P+xknzzp8SlCndQKFWsb404vsNK5VtPB1RJ0Sje0dFM2nO1/Kt8pVX+r1WFJ1gur +BSugsAs3erTCy/ZIB6nhRcFBGsJfvZV24bVxFQKUW6ARigia3bqOZlM4ksdvjrLL +HZ84WiLY++W3wTMIh34KzLClVokl4au8L9Z1g+24EqJMRV7722FVm3GZDdVhmzNy +gKXQvopFvrfQY8Uk9ApZOVSsx9zH65UDP0u13ki7UTlyCi9ucGgT4w6cek1DUcXN +W0FU8498J80fADcWG4bsPjiJ7WiKI29saVked4gfUqx0G5kUCP+l5oo+okm/BFrt +66sO0cEbG9KZCmgdTa2KoNUtsampIBXDG0+9c2SWp3pa1j5sErngR2dyV6L8Muxx +dQJbVR4RxXkM8LkAUMN+YAatknMKEPa7gXalt73fiuvFXpY7NOxeUmlrRXucnCY8 +vSnTNPcuQYAeNyg7z2gyrpqp81Kd+XKUIdKYuydPpvRHzhFOBIf68NUc3MR1UeZx +eBF+bnVkkyL5nRhU+1eLZ0YyW2/PBCmegUahCHEWLpXGzeNBwdnt3HrJEQg54Fu8 +VPcKFa15kJ8QghTlMwYSQkXUPZeBOYaHtva+aSq5Ag0EYyw+bgEQAOECkzewc6Gd +lGA65PIqEPL4JORnsYr0fHNHYGJ+WCHC9HuQEcFJ8Q0R7nSNbMmi6KjnTqM4QdkL +myEQs9TKK5brFBa/ObbL4JVyBJTi4LdlCg95YfEQKTClb7roaLwKH+TykBJ617R+ +QUvFp65YCwzNiS2aKu6AC6zsD2rsrJulq5LIDKvWwLWfFtwRwOHMgviuGlj/97om +XUhuPIScJ5rB+QzICaiKriF/qVVO7obTV0OcUYrTOkNKo8H/q/fw0FSvX3eznz6f +K46gi468P34VO5ZLZs4UQih7EzDqdGz4Lo8GnIrtThWH5rLyFhm7PdB6yLwf8bQe +qDviI73Zb0w9ookYJQpBdglnDyLdhVjoAA0Hp3Q5XL6yvZVZz06YL3k1VWHI2H/A +WeceTFlrExmF2u1KousrwD8/583T042oRcGW97YV5Q9Uw7iffKvJiLp5VBZa319F +svF9hxM8bZVgVqBksBI/ScIvzMgG5FHeGflJ2xEIWWv2uDiAs498kmm5wI6Hdcfs +YuTT+uPsKSNd5G+Ts/39+hirXhVygtx6pFeCRVTdrfWcOfVIVilZm7I+qDNrap2w +6dT23zJT1DvZFCXymU9dxPEoLOm7cn9oxkvjs6c/YOIeyU5CSdI5ZdO99G/8TbFw +LKdSTajkRTVfIa99j/8P0jPMetnk7PoHABEBAAGJBGwEGAEIACAWIQQ6nqEZOpe1 +SL4UV9SJGfa9K0jXVAUCYyw+bgIbAgJACRCJGfa9K0jXVMF0IAQZAQgAHRYhBIhe +K9z5awtFq/BYRT5K1HGd3po4BQJjLD5uAAoJED5K1HGd3po4CjIQALM1EjN2zCtQ +ip9m5IuPnHA6d/YFIuNAXjAnZcg6HQSi+ERFu1w3mL2ya+ll3EYOhFMVMEnja0h0 +e+ezt+WMJt1R2eH1PMZEeJ2uMXgIOl0QsTUXWemhweTQEmsgqksQMqsvFuogxNP/ +Co+cKJCiBpMIU9FJfLrSoGGidnUqr48QjGRVx69FPPthlNOj6/KkhksmJFTT7YhM +G8V7K8yxiYIcQQvRfINLZfmXF2cvGlZU21Il43nCmNb4iFUnrUOouQ7k/Oe/OzZq +ggmcfPGp+6GoSJB3exr3NonjuEhmYR1VPe+WcgsWSl5RWVEaasN5C7dcekqWsqlA +UHpvIqDfzaL3s4HBp19uxxdiy42wXotDhT02P7DJVjVJf9XWgQbmaanzLdPnrUxw +UYcLEQM17VJ37j9DyW9zPBB/pKLRwDr89xpmAKMtRzeXImnBuUJHctp/c9dFvbhJ +GgxFgLHhBCUdTsEs7phLAB32n+oP2Mas6yRgG98k46ic+ZNdJIzplfXT96n/5u+a +eyQOkY3rZh6Jbhzwpb+r5Tj1rAXIJta6prkd8d3+1Rg37x9HkEAHn3ZPDk1RwbVO +VCU0UjMOPU/6Y3FsQq30jSuNhr70p5H/okwpkS7YX/7lY0068XEpWoIyJQIHcBM0 +HZCV904mwdqfRDujziIoQ/xEm3+zIVtzdgYP/3kX4kUup2DEUEY5ND3oKc5CAjyF +UGU7thzILHtt76WrrwXqqfxjJKl9+acmLlrBLj5FQwFEhThEeWunm2T5hhgr4FfF +4MXneQqVRhUslm5+sRIm/K1ye5q4NLfgiwZD/nTM5defyI4cynrby4fy1R9SCBLo +lzvPLpvEraegxIQ3Hk1kT5HmnXi79hZVbZxIkt6wNPG1rTS0TaJ2M37nmhm4GcNj +DtKnhy4Aq569DO8Sd0U6bYKIaFFoi7X5KYnKWY37pYFl0ERU0ffQp7Mg9ruHRvHh +kA41iQ5kF/O1fJHRUfXcYnfIfvN1C77TdXLQryrA/bRNS2dlulIu4CYH+lsfhlIj +BQrRoEOWGu6PzKBwPIXKcVFfB4BcG5si/3vZD3Q1xfUj3MV2sQcgVNbd/t7jJda3 +uhX9DiEPyqdgl382Rq+LoPV4az1igDLDUe7yPrCpD1rwJMyKJyStiP4i1jiIejVa +vPiCpXH1oxCQeR8KqjdrdNbSVgDOapPauZEcwJL464ZlWkdxKX09sN6GaXFTHYTE +XkxrzhlYZaysWEO86+iBUcHnRtc1D3dMJg7hi/Du1IouTwnBsYIwd9Fz/wHpDkiu +0Z3gCmYEg4JQj4iwAtNOQfhJzhEH4kHLom8j8XI/TlgJe/vtNUUUDp3i8qczTjua +VywRbE3+9xjjsSiIuQINBGGbfsABEADXZ0mAMCsf7Y2yJ2aSBLCVPUEZESeB+kDA +ciksAguHeMVp7owI2mfAfw4Z8TEQE6aOb+cqbDbqneQ6nuaRxMNvlDhhtrKztt+U +xWrvgPMvJ59/O1ujY7+VZ+3lXJ01r0u5Xdk1zUZ1uYYl8+Lt3/WmJmmlosX22EwQ +dsAvuRuOmuM2jPTMVN81TaZ2R0Om5vT2L8/SQY7csxc5OylBLskuDQmU1PNlkwTl +tCMqHl7g08LPADLGi/3C9I/0P92A+jZ9RgDCI8BIOS5Ny7CStT96dqrje6TLbWRx +IWBjn1xsVUTxYQY4FXYfw1p7Rm5kzZGImzI4Z7wNcVHCty+X9La+7yKwABW2BeTq +P0rCXpKmy4nNOgwTDVRaA5CL8mg2doK8HOoa03SgziPGEeHHFyS7I2DJAEw+h9K8 +jXVy9Tgd7ZSXmO4Z99ovdHksX4fmD7iOWU2WRp3b2yhpO8b/TgU2r9AQaNMAOsud +N+0Q9NGkEYhWRdWsGapJpa3Q+v1s88lodPhjcIkqGUCH82T/+MhTDdotssj8a6ue +g3mcwVD38690j8ngoBCPmLCNtgZDqhtfy+905uO3ksonF/X2lFc/YG7sqBiOHoDK +yN347pPyK8KbukiMoOasDIQUsryBUeAM0M0Xzftc66BaPgH75KTPZqkOO8aTBCRx +l/AHTejV5wARAQABiQI2BBgBCAAgFiEEOp6hGTqXtUi+FFfUiRn2vStI11QFAmGb +fsACGwwACgkQiRn2vStI11RimQ//a0dHoSekuMLDPEYpQYEEr5JYYWFdFPA/ixeM +HZ566ankWMqKHynhbqqRXVhqKd0pveV+ALhgrUU1nBB9Ma5P51VNEGC0rThiLBVk +CTrV0UrUXxxgA5cTJBVZe7yyysqdCHcuuNNrlQbuzhkJWBySgeWrbmd4VUbDdL0i +GMFSAtbrYjSWreBDBgWBYd8uRiPHtc6ACBIhzGGOulw0k4NnMXHDJyCwEv7EAxES +Y/V8kLK3p7DXc6tpmd1vZ38X52CnVRM6aLQl6YWKXhA+vp9cPJehfEbK+ZPErbCB +4jF6AC4BFwntE0YSod6TVM+wJBuPplnWr4nUUhsk/FeulWGF+AhUG2XJiIn90ZHI +cxlct8Nt1zBgoquxWYmqt/z+s7TOBXnvotOMJggjgsPozFlHjoy47Xc14VwzxnU+ +NknXYqDAsTfjgXiw7NRQdJ8BUd4TSf8iINf2uCNzp4QDJ37VfN7/BiaXBGjEEN+k +OKpxkse6qm06GFd+bXGSHh6H7z2d973k2QIfW03opWAwS+AdH8xmUUaAyyhEql5m +lXZVkQRS8rEO4IV5HQV8FD5iV9bdNihaydlKP09/D9ZDlie0dJZXOIHQDuwDt4Ab +3reSs1T5utClQE0FS8ZZjPuitq/l+TsT83TASVynvEmwvNiEbpOsNjvB1u++w4ni +qCHQv3Y= +=AKGw +-----END PGP PUBLIC KEY BLOCK----- diff --git a/ansible/roles/monitoring/templates/clickhouse.sources b/ansible/roles/monitoring/templates/clickhouse.sources new file mode 100644 index 00000000..db4eda17 --- /dev/null +++ b/ansible/roles/monitoring/templates/clickhouse.sources @@ -0,0 +1,7 @@ +Architectures: amd64 +Suites: stable +Uris: https://packages.clickhouse.com/deb +Types: deb +Components: main +Enabled: yes +Signed-By: /etc/apt/clickhouse.asc diff --git a/ansible/roles/monitoring/templates/etc_default_prometheus b/ansible/roles/monitoring/templates/etc_default_prometheus new file mode 100644 index 00000000..5ba9e6c2 --- /dev/null +++ b/ansible/roles/monitoring/templates/etc_default_prometheus @@ -0,0 +1,4 @@ +# Managed by ansible, see +# monitoring/templates/etc_default_prometheus +ARGS="" +GODEBUG=x509ignoreCN=0 diff --git a/ansible/roles/monitoring/templates/grafana.gpg b/ansible/roles/monitoring/templates/grafana.gpg new file mode 100644 index 00000000..35771a23 --- /dev/null +++ b/ansible/roles/monitoring/templates/grafana.gpg @@ -0,0 +1,41 @@ +-----BEGIN PGP PUBLIC KEY BLOCK----- + +mQGNBGTnhmkBDADUE+SzjRRyitIm1siGxiHlIlnn6KO4C4GfEuV+PNzqxvwYO+1r +mcKlGDU0ugo8ohXruAOC77Kwc4keVGNU89BeHvrYbIftz/yxEneuPsCbGnbDMIyC +k44UOetRtV9/59Gj5YjNqnsZCr+e5D/JfrHUJTTwKLv88A9eHKxskrlZr7Un7j3i +Ef3NChlOh2Zk9Wfk8IhAqMMTferU4iTIhQk+5fanShtXIuzBaxU3lkzFSG7VuAH4 +CBLPWitKRMn5oqXUE0FZbRYL/6Qz0Gt6YCJsZbaQ3Am7FCwWCp9+ZHbR9yU+bkK0 +Dts4PNx4Wr9CktHIvbypT4Lk2oJEPWjcCJQHqpPQZXbnclXRlK5Ea0NVpaQdGK+v +JS4HGxFFjSkvTKAZYgwOk93qlpFeDML3TuSgWxuw4NIDitvewudnaWzfl9tDIoVS +Bb16nwJ8bMDzovC/RBE14rRKYtMLmBsRzGYHWd0NnX+FitAS9uURHuFxghv9GFPh +eTaXvc4glM94HBUAEQEAAbQmR3JhZmFuYSBMYWJzIDxlbmdpbmVlcmluZ0BncmFm +YW5hLmNvbT6JAdQEEwEKAD4WIQS1Oud7rbYwpoMEYAWWP6J3EEWFRQUCZOeGaQIb +AwUJA8JnAAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRCWP6J3EEWFRUiADACa +i+xytv2keEFJWjXNnFAx6/obnHRcXOI3w6nH/zL8gNI7YN5jcdQT2NYvKVYTb3fW +GuMsjHWgat5Gq3AtJrOKABpZ6qeYNPk0Axn/dKtOTwXjZ4pKX3bbUYvVfs0fCEZv +B0HHIj2wI9kgMpoTrkj22LE8layZTPOoQ+3/FbLzS8hN3CYZj25mHN7bpZq8EbV3 +8FW9EU0HM0tg6CvoxkRiVqAuAC0KnVIZAdhD4dlYKuncq64nMvT1A5wxSYbnE+uf +mnWQQhhS6BOwRqN054yw1FrWNDFsvnOSHmr8dIiriv+aZYvx5JQFJ7oZP3LwdYyg +ocQcAJA8HFTIk3P6uJiIF/zdDzocgdKs+IYDoId0hxX7sGCvqdrsveq8n3m7uQiN +7FvSiV0eXIdV4F7340kc8EKiYwpuYSaZX0UWKLenzlUvD+W4pZCWtoXzPsW7PKUt +q1xdW0+NY+AGLCvSJCc5F4S5kFCObfBAYBbldjwwJFocdq/YOvvWYTPyV7kJeJS5 +AY0EZOeGaQEMALNIFUricEIwtZiX7vSDjwxobbqPKqzdek8x3ud0CyYlrbGHy0k+ +FDEXstjJQQ1s9rjJSu3sv5wyg9GDAUH3nzO976n/ZZvKPti3p2XU2UFx5gYkaaFV +D56yYxqGY0YU5ft6BG+RUz3iEPg3UBUzt0sCIYnG9+CsDqGOnRYIIa46fu2/H9Vu +8JvvSq9xbsK9CfoQDkIcoQOixPuI4P7eHtswCeYR/1LUTWEnYQWsBCf57cEpzR6t +7mlQnzQo9z4i/kp4S0ybDB77wnn+isMADOS+/VpXO+M7Zj5tpfJ6PkKch3SGXdUy +3zht8luFOYpJr2lVzp7n3NwB4zW08RptTzTgFAaW/NH2JjYI+rDvQm4jNs08Dtsp +nm4OQvBA9Df/6qwMEOZ9i10ixqk+55UpQFJ3nf4uKlSUM7bKXXVcD/odq804Y/K4 +y3csE059YVIyaPexEvYSYlHE2odJWRg2Q1VehmrOSC8Qps3xpU7dTHXD74ZpaYbr +haViRS5v/lCsiwARAQABiQG8BBgBCgAmFiEEtTrne622MKaDBGAFlj+idxBFhUUF +AmTnhmkCGwwFCQPCZwAACgkQlj+idxBFhUUNbQv8DCcfi3GbWfvp9pfY0EJuoFJX +LNgci7z7smXq7aqDp2huYQ+MulnPAydjRCVW2fkHItF2Ks6l+2/8t5Xz0eesGxST +xTyR31ARENMXaq78Lq+itZ+usOSDNuwJcEmJM6CceNMLs4uFkX2GRYhchkry7P0C +lkLxUTiB43ooi+CqILtlNxH7kM1O4Ncs6UGZMXf2IiG9s3JDCsYVPkC5QDMOPkTy +2ZriF56uPerlJveF0dC61RZ6RlM3iSJ9Fwvea0Oy4rwkCcs5SHuwoDTFyxiyz0QC +9iqi3fG3iSbLvY9UtJ6X+BtDqdXLAT9Pq527mukPP3LwpEqFVyNQKnGLdLOu2YXc +TWWWseSQkHRzBmjD18KTD74mg4aXxEabyT4snrXpi5+UGLT4KXGV5syQO6Lc0OGw +9O/0qAIU+YW7ojbKv8fr+NB31TGhGYWASjYlN1NvPotRAK6339O0/Rqr9xGgy3AY +SR+ic2Y610IM7xccKuTVAW9UofKQwJZChqae9VVZ +=J9CI +-----END PGP PUBLIC KEY BLOCK----- diff --git a/ansible/roles/monitoring/templates/grafana.list b/ansible/roles/monitoring/templates/grafana.list new file mode 100644 index 00000000..adbad20b --- /dev/null +++ b/ansible/roles/monitoring/templates/grafana.list @@ -0,0 +1 @@ +deb https://packages.grafana.com/oss/deb stable main diff --git a/ansible/roles/monitoring/templates/grafana.sources b/ansible/roles/monitoring/templates/grafana.sources new file mode 100644 index 00000000..dd17e11d --- /dev/null +++ b/ansible/roles/monitoring/templates/grafana.sources @@ -0,0 +1,7 @@ +Architectures: amd64 +Suites: stable +Uris: https://apt.grafana.com +Types: deb +Components: main +Enabled: yes +Signed-By: /etc/apt/grafana.asc diff --git a/ansible/roles/monitoring/templates/htpasswd b/ansible/roles/monitoring/templates/htpasswd new file mode 100644 index 00000000..3c18c804 --- /dev/null +++ b/ansible/roles/monitoring/templates/htpasswd @@ -0,0 +1,5 @@ +# ansible-managed in ooni/sysadmin.git +# Username should be taken from @openobservatory.org domain +# Password should be generated with scripts/ngx-mkpasswd +# don't remove federico, because it will break monitoring +{{ monitoring_htpasswd }} diff --git a/ansible/roles/monitoring/templates/jupyter.service b/ansible/roles/monitoring/templates/jupyter.service new file mode 100644 index 00000000..d8c1dc97 --- /dev/null +++ b/ansible/roles/monitoring/templates/jupyter.service @@ -0,0 +1,37 @@ +[Unit] +Description=Jupyter +After=network.target + +[Service] +Type=simple +ExecStart=/usr/bin/jupyter-notebook --no-browser +Environment=JUPYTER_CONFIG_DIR=/var/lib/jupyter/conf +Environment=JUPYTER_DATA_DIR=/var/lib/jupyter/data +Environment=JUPYTER_RUNTIME_DIR=/var/lib/jupyter/run +Restart=on-failure +# DynamicUser=yes implies ProtectSystem=strict ProtectHome=read-only PrivateTmp, RemoveIPC, NoNewPrivileges, RestrictSUIDSGID +DynamicUser=yes +PrivateDevices=yes +PrivateMounts=yes +PrivateUsers=yes +DevicePolicy=closed +ProtectHostname=yes +ProtectHome=yes +ProtectControlGroups=yes +ProtectKernelModules=yes +ProtectKernelTunables=yes +RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK +RestrictNamespaces=yes +RestrictRealtime=yes +MemoryDenyWriteExecute=yes +LockPersonality=yes + +CacheDirectory=jupyter +ReadWriteDirectories=/var/lib/jupyter +RuntimeDirectory=jupyter +StateDirectory=jupyter +WorkingDirectory=/var/lib/jupyter +WorkingDirectory=/var/lib/jupyter + +[Install] +WantedBy=multi-user.target diff --git a/ansible/roles/monitoring/templates/nginx.conf b/ansible/roles/monitoring/templates/nginx.conf new file mode 100644 index 00000000..c383297d --- /dev/null +++ b/ansible/roles/monitoring/templates/nginx.conf @@ -0,0 +1,203 @@ +# Managed by ansible +# roles/ooni-backend/monitoring/nginx.conf + +# Grafana +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name grafana.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + + ssl_certificate /var/lib/dehydrated/certs/grafana.ooni.org/fullchain.pem; + ssl_certificate_key /var/lib/dehydrated/certs/grafana.ooni.org/privkey.pem; + ssl_trusted_certificate /var/lib/dehydrated/certs/grafana.ooni.org/chain.pem; + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + add_header Strict-Transport-Security "max-age=63072000" always; + ssl_stapling on; + ssl_stapling_verify on; + + resolver 127.0.0.1; + + # Grafana uses its own authentication + + location / { + proxy_pass http://localhost:3000; + # do not forward the basic auth header to grafana + proxy_set_header Authorization ""; + proxy_set_header Host $http_host; + } + + # Proxy Grafana Live WebSocket connections. + location /api/live { + rewrite ^/(.*) /$1 break; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection "Upgrade"; + proxy_set_header Host $http_host; + proxy_pass http://localhost:3000/; + } +} + +# Netdata +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name netdata.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + + ssl_certificate /var/lib/dehydrated/certs/netdata.ooni.org/fullchain.pem; + ssl_certificate_key /var/lib/dehydrated/certs/netdata.ooni.org/privkey.pem; + ssl_trusted_certificate /var/lib/dehydrated/certs/netdata.ooni.org/chain.pem; + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + + location /{ + proxy_set_header Host $host; + proxy_set_header X-Forwarded-Host $host; + proxy_set_header X-Forwarded-Server $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_http_version 1.1; + proxy_pass_request_headers on; + proxy_set_header Connection "keep-alive"; + proxy_store off; + proxy_pass http://127.0.0.1:19999/; + auth_basic "OONI Monitoring"; + auth_basic_user_file /etc/nginx/monitoring.htpasswd; + gzip on; + gzip_proxied any; + gzip_types *; + } +} + +# Prometheus +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name prometheus.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + + ssl_certificate /var/lib/dehydrated/certs/prometheus.ooni.org/fullchain.pem; + ssl_certificate_key /var/lib/dehydrated/certs/prometheus.ooni.org/privkey.pem; + ssl_trusted_certificate /var/lib/dehydrated/certs/prometheus.ooni.org/chain.pem; + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + location / { + proxy_pass http://127.0.0.1:9090; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + auth_basic "OONI Monitoring"; + auth_basic_user_file /etc/nginx/monitoring.htpasswd; + } +} + +# Jupyter +server { + listen 443 ssl http2; + listen [::]:443 ssl http2; + server_name jupyter.ooni.org; + access_log syslog:server=unix:/dev/log,severity=info; + error_log syslog:server=unix:/dev/log,severity=info; + gzip on; + + ssl_certificate /var/lib/dehydrated/certs/jupyter.ooni.org/fullchain.pem; + ssl_certificate_key /var/lib/dehydrated/certs/jupyter.ooni.org/privkey.pem; + ssl_trusted_certificate /var/lib/dehydrated/certs/jupyter.ooni.org/chain.pem; + + # Use the intermediate configuration to support legacy probes + # https://ssl-config.mozilla.org/#server=nginx&version=1.14.2&config=intermediate&openssl=1.1.1d&guideline=5.6 + ssl_session_timeout 5m; + ssl_session_cache shared:MozSSL:30m; + ssl_session_tickets off; + + ssl_protocols TLSv1.2 TLSv1.3; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384; + ssl_prefer_server_ciphers off; + + # HSTS (ngx_http_headers_module is required) (63072000 seconds) + add_header Strict-Transport-Security "max-age=63072000" always; + + # OCSP stapling + ssl_stapling on; + ssl_stapling_verify on; + + # verify chain of trust of OCSP response using Root CA and Intermediate certs + #ssl_trusted_certificate /path/to/root_CA_cert_plus_intermediates; + + resolver 127.0.0.1; + + location / { + proxy_pass http://127.0.0.1:8888; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $http_host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + auth_basic "OONI Monitoring"; + auth_basic_user_file /etc/nginx/monitoring.htpasswd; + + # websocket headers + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Scheme $scheme; + + proxy_buffering off; + } +} diff --git a/ansible/roles/monitoring/templates/vector.list b/ansible/roles/monitoring/templates/vector.list new file mode 100644 index 00000000..1b56727a --- /dev/null +++ b/ansible/roles/monitoring/templates/vector.list @@ -0,0 +1,2 @@ +# See ansible/roles/monitoring/tasks/main.yml +deb https://repositories.timber.io/public/vector/deb/debian bullseye main diff --git a/ansible/roles/monitoring/templates/vector.toml b/ansible/roles/monitoring/templates/vector.toml new file mode 100644 index 00000000..1b236589 --- /dev/null +++ b/ansible/roles/monitoring/templates/vector.toml @@ -0,0 +1,24 @@ +# Managed by ansible + +[sources.local_journald] +type = "journald" +exclude_units = [ "clickhouse" ] + +[sources.remote_vector] +type = "vector" +address = "0.0.0.0:10514" + +tls.enabled = true +tls.verify_certificate = true +tls.verify_hostname = false +tls.ca_file = "/etc/vector/oonicacert.pem" +tls.crt_file = "/etc/vector/node-cert.pem" +tls.key_file = "/etc/vector/node.key" + +[sinks.clickhouse_sink] +type = "clickhouse" +inputs = [ "local_journald", "remote_vector" ] +database = "default" +endpoint = "http://localhost:8123" +table = "logs" +date_time_best_effort = true diff --git a/ansible/roles/notify-slack/tasks/main.yml b/ansible/roles/notify-slack/tasks/main.yml new file mode 100644 index 00000000..83290d00 --- /dev/null +++ b/ansible/roles/notify-slack/tasks/main.yml @@ -0,0 +1,8 @@ +--- +- name: "notify #ooni-bots at openobservatory.slack.com" + slack: + token: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/ansible_slack_token', profile='oonidevops_user_prod') }}" + channel: "#ooni-bots" + msg: "{{ lookup('template', 'notify-slack.j2') }}" + delegate_to: localhost + run_once: true diff --git a/ansible/roles/notify-slack/templates/notify-slack.j2 b/ansible/roles/notify-slack/templates/notify-slack.j2 new file mode 100644 index 00000000..6f43d7dc --- /dev/null +++ b/ansible/roles/notify-slack/templates/notify-slack.j2 @@ -0,0 +1,10 @@ +{% set cleanness = 'dirty' if lookup('pipe', 'git status -s') else 'clean' %} +{% set head = lookup('pipe', 'git rev-parse HEAD') %} +{% set head7 = lookup('pipe', 'git rev-parse --short HEAD') %} +{% set branch = lookup('pipe', 'git describe --always --exact-match --all HEAD') | replace('heads/', '') %} +{% set user_slug = lookup('pipe', 'whoami') + '@' + lookup('pipe', 'hostname') %} +{% if lookup('file', '~/.ooni-sysadmin/user_slug', errors='ignore') %} +{% set user_slug = lookup('file', '~/.ooni-sysadmin/user_slug') %} +{% endif %} + +{{ user_slug }} runs `{{ lookup('env', 'ANSIBLE_SLACK_CMD') }}` on top of {{ cleanness }} `` (`{{ branch }}`) From f87f1cefe0914d5e6db32e302579091b5cd19b8f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 20 Dec 2024 17:59:12 +0100 Subject: [PATCH 60/88] Add support for deploying airflow (#132) It's using our own fork of this idealista role available here: https://github.com/ooni/airflow-role. We should probably at some point upstream the changes. --- ansible/deploy-airflow.yml | 9 ++ ansible/deploy-tier0.yml | 14 +-- ansible/group_vars/airflow/vars.yml | 13 +++ ansible/inventory | 3 + ansible/requirements.yml | 7 ++ ansible/roles/dehydrated/meta/main.yml | 2 +- ansible/roles/dehydrated/tasks/main.yml | 8 -- ansible/roles/nginx/tasks/main.yml | 10 --- ansible/roles/oonidata_airflow/Readme.md | 25 ++++++ .../roles/oonidata_airflow/defaults/main.yml | 2 + .../roles/oonidata_airflow/handlers/main.yml | 4 + ansible/roles/oonidata_airflow/tasks/main.yml | 89 +++++++++++++++++++ .../templates/nginx-airflow.j2 | 40 +++++++++ tf/environments/prod/dns_records.tf | 20 +++-- 14 files changed, 209 insertions(+), 37 deletions(-) create mode 100644 ansible/deploy-airflow.yml create mode 100644 ansible/group_vars/airflow/vars.yml create mode 100644 ansible/roles/oonidata_airflow/Readme.md create mode 100644 ansible/roles/oonidata_airflow/defaults/main.yml create mode 100644 ansible/roles/oonidata_airflow/handlers/main.yml create mode 100644 ansible/roles/oonidata_airflow/tasks/main.yml create mode 100644 ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 diff --git a/ansible/deploy-airflow.yml b/ansible/deploy-airflow.yml new file mode 100644 index 00000000..ebf34e4a --- /dev/null +++ b/ansible/deploy-airflow.yml @@ -0,0 +1,9 @@ +--- +- name: Deploy airflow frontend host + hosts: + - data1.htz-fsn.prod.ooni.nu + become: true + roles: + - oonidata_airflow + vars: + airflow_public_fqdn: "airflow.prod.ooni.io" diff --git a/ansible/deploy-tier0.yml b/ansible/deploy-tier0.yml index 7c11a8c6..3657d544 100644 --- a/ansible/deploy-tier0.yml +++ b/ansible/deploy-tier0.yml @@ -8,15 +8,5 @@ - name: Include clickhouse playbook ansible.builtin.import_playbook: deploy-clickhouse.yml -- name: Deploy oonidata worker nodes - hosts: - - data1.htz-fsn.prod.ooni.nu - become: true - tags: - - oonidata_worker - roles: - - oonidata - vars: - enable_jupyterhub: false - enable_oonipipeline_worker: true - clickhouse_url: "clickhouse://write:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/clickhouse_write_password', profile='oonidevops_user_prod') | hash('sha256') }}@clickhouse1.prod.ooni.io/ooni" +- name: Include airflow playbook + ansible.builtin.import_playbook: deploy-airflow.yml diff --git a/ansible/group_vars/airflow/vars.yml b/ansible/group_vars/airflow/vars.yml new file mode 100644 index 00000000..e4946716 --- /dev/null +++ b/ansible/group_vars/airflow/vars.yml @@ -0,0 +1,13 @@ +airflow_admin_users: + - name: OONI Admin + username: admin + password: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_admin_password', profile='oonidevops_user_prod') }}" + role: Admin + firstname: Open + lastname: Observatory + email: admin@ooni.org +airflow_fernet_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_fernet_key', profile='oonidevops_user_prod') }}" +airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" +airflow_executor: "LocalExecutor" +airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" +airflow_database_conn: "postgresql+psycopg2://airflow:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_postgresql_password', profile='oonidevops_user_prod') }}@ooni-tier0-postgres.c7mgscca82no.eu-central-1.rds.amazonaws.com/airflow" diff --git a/ansible/inventory b/ansible/inventory index bda9a486..1d3c41e9 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -10,6 +10,9 @@ data1.htz-fsn.prod.ooni.nu data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu +[airflow] +data1.htz-fsn.prod.ooni.nu + ## Location tags [htz_fsn] diff --git a/ansible/requirements.yml b/ansible/requirements.yml index 52ae85ea..e78d86bb 100644 --- a/ansible/requirements.yml +++ b/ansible/requirements.yml @@ -1,8 +1,15 @@ - src: willshersystems.sshd + version: v0.25.0 - src: nginxinc.nginx + version: 0.24.3 - src: geerlingguy.certbot + version: 5.2.0 - src: artis3n.tailscale + version: v4.5.0 - src: https://github.com/idealista/clickhouse_role scm: git version: 3.5.1 name: idealista.clickhouse_role +- src: https://github.com/ooni/airflow-role.git + scm: git + name: ooni.airflow_role diff --git a/ansible/roles/dehydrated/meta/main.yml b/ansible/roles/dehydrated/meta/main.yml index e7e996b0..0e72e865 100644 --- a/ansible/roles/dehydrated/meta/main.yml +++ b/ansible/roles/dehydrated/meta/main.yml @@ -1,5 +1,5 @@ --- dependencies: - - nginx-buster + - nginx ... diff --git a/ansible/roles/dehydrated/tasks/main.yml b/ansible/roles/dehydrated/tasks/main.yml index 0bfaf7c3..0a84f1a0 100644 --- a/ansible/roles/dehydrated/tasks/main.yml +++ b/ansible/roles/dehydrated/tasks/main.yml @@ -47,14 +47,6 @@ tags: dehydrated shell: systemctl reload nginx.service -- name: allow incoming TCP connections to Nginx on port 80 - tags: dehydrated - blockinfile: - path: /etc/ooni/nftables/tcp/80.nft - create: yes - block: | - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" - - name: reload nftables service tags: dehydrated shell: systemctl reload nftables.service diff --git a/ansible/roles/nginx/tasks/main.yml b/ansible/roles/nginx/tasks/main.yml index 757b42e0..9af2a9b4 100644 --- a/ansible/roles/nginx/tasks/main.yml +++ b/ansible/roles/nginx/tasks/main.yml @@ -51,13 +51,3 @@ notify: reload nginx tags: - nginx - -- name: create config dir - ansible.builtin.file: - path: /etc/ooni/nftables/tcp - state: directory - owner: root - group: root - mode: 0755 - tags: - - nftables diff --git a/ansible/roles/oonidata_airflow/Readme.md b/ansible/roles/oonidata_airflow/Readme.md new file mode 100644 index 00000000..c43a8f20 --- /dev/null +++ b/ansible/roles/oonidata_airflow/Readme.md @@ -0,0 +1,25 @@ +## Airflow role deployment notes + +There are a few pieces that are dependencies to this role running properly that +you will have to do manually: + +* Setup the postgresql database and create the relevant DB and account. + +Be sure to give correct permissions to the airflow user. Here is a relevant snippet: +``` +CREATE DATABASE airflow +CREATE ROLE airflow WITH PASSWORD '' LOGIN; +GRANT ALL PRIVILEGES ON DATABASE airflow TO airflow; +GRANT ALL ON SCHEMA public TO airflow; +``` + +* For some reason the admin account creation is failing. This is likely a bug + in the upstream role. During the last deploy this was addressed by logging +into the host and running the create task manually: +``` +AIRFLOW_CONFIG=/etc/airflow/airflow.cfg AIRFLOW_HOME=/opt/airflow/ /opt/airflow/bin/airflow users create --username admin --password XXX --firstname Open --lastname Observatory --role Admin --email admin@ooni.org +``` + +* Once the setup is complete, you will then have to login to the host using the + admin user and go into Admin->Configuration and add the `clickhouse_url` +variable diff --git a/ansible/roles/oonidata_airflow/defaults/main.yml b/ansible/roles/oonidata_airflow/defaults/main.yml new file mode 100644 index 00000000..c422ed2b --- /dev/null +++ b/ansible/roles/oonidata_airflow/defaults/main.yml @@ -0,0 +1,2 @@ +tls_cert_dir: /var/lib/dehydrated/certs +certbot_domains_extra: [] diff --git a/ansible/roles/oonidata_airflow/handlers/main.yml b/ansible/roles/oonidata_airflow/handlers/main.yml new file mode 100644 index 00000000..f6dda47d --- /dev/null +++ b/ansible/roles/oonidata_airflow/handlers/main.yml @@ -0,0 +1,4 @@ +- name: Reload nginx + ansible.builtin.systemd_service: + name: nginx + state: reloaded diff --git a/ansible/roles/oonidata_airflow/tasks/main.yml b/ansible/roles/oonidata_airflow/tasks/main.yml new file mode 100644 index 00000000..cec780a7 --- /dev/null +++ b/ansible/roles/oonidata_airflow/tasks/main.yml @@ -0,0 +1,89 @@ +- name: Ensure Airflow group + group: + name: "airflow" + become: true + +# TODO: uncomment this section if you want to redeploy it +# this was added after the user had already been created by the airflow_role +# and so it's failing because it's trying to modify the user. +#- name: Ensure Airflow user +# user: +# name: "airflow" +# group: "airflow" +# system: true +# shell: "/usr/sbin/nologin" +# createhome: "yes" +# home: "/opt/airflow" +# become: true + +- name: Checkout oonidata repo + become_user: airflow + ansible.builtin.git: + repo: 'https://github.com/ooni/data.git' + dest: /opt/airflow/oonidata + version: airflow + +- ansible.builtin.include_role: + name: ooni.airflow_role + tags: + - oonidata + - airflow + vars: + airflow_app_home: /opt/airflow + airflow_dags_folder: /opt/airflow/oonidata/dags/ + airflow_webserver_host: "127.0.0.1" + airflow_webserver_port: 8080 + airflow_webserver_base_url: "https://{{ airflow_public_fqdn }}" + airflow_environment_extra_vars: + - name: AIRFLOW_VAR_DATA_DIR + value: "{{ airflow_app_home }}/data_dir" + airflow_extra_packages: + - postgres + - virtualenv + airflow_services: + airflow_webserver: + service_name: airflow-webserver + enabled: true + running: true + state: started + path: airflow-webserver.service.j2 + airflow_scheduler: + service_name: airflow-scheduler + enabled: true + running: true + state: started + path: airflow-scheduler.service.j2 + +- name: Set correct permissions on oonidata repo dir + ansible.builtin.file: + path: /opt/oonidata + state: directory + mode: '0755' + owner: airflow + recurse: yes + +- ansible.builtin.include_role: + name: nginx + tags: + - oonidata + - nginx + +- ansible.builtin.include_role: + name: dehydrated + tags: + - oonidata + - dehydrated + vars: + ssl_domains: "{{ [ inventory_hostname ] + [ airflow_public_fqdn ] }}" + +- name: Setup airflow nginx config + ansible.builtin.template: + src: nginx-airflow.j2 + dest: /etc/nginx/sites-enabled/02-airflow + owner: root + mode: "0655" + notify: + - Reload nginx + tags: + - oonidata + - config diff --git a/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 b/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 new file mode 100644 index 00000000..6c3b3fec --- /dev/null +++ b/ansible/roles/oonidata_airflow/templates/nginx-airflow.j2 @@ -0,0 +1,40 @@ +# ansible-managed in ooni/devops.git + +map $http_upgrade $connection_upgrade { + default upgrade; + '' close; +} + +server { + listen 443 ssl http2; + + include /etc/nginx/ssl_intermediate.conf; + + ssl_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/fullchain.pem; + ssl_certificate_key {{ tls_cert_dir }}/{{ inventory_hostname }}/privkey.pem; + ssl_trusted_certificate {{ tls_cert_dir }}/{{ inventory_hostname }}/chain.pem; + + server_name {{ airflow_public_fqdn }}; + access_log /var/log/nginx/{{ airflow_public_fqdn }}.access.log; + error_log /var/log/nginx/{{ airflow_public_fqdn }}.log warn; + + add_header Access-Control-Allow-Origin *; + + ## Airflow reverse proxy + location / { + proxy_pass http://127.0.0.1:8080; + + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header Host $host; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + + client_max_body_size 100M; + + # WebSocket support + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection $connection_upgrade; + proxy_set_header X-Scheme $scheme; + proxy_buffering off; + } +} diff --git a/tf/environments/prod/dns_records.tf b/tf/environments/prod/dns_records.tf index a24b26e0..88fb03d4 100644 --- a/tf/environments/prod/dns_records.tf +++ b/tf/environments/prod/dns_records.tf @@ -979,7 +979,7 @@ resource "aws_route53_record" "data1-htz-fsn-prod-ooni-nu-_a_" { records = ["142.132.254.225"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { @@ -987,7 +987,7 @@ resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { records = ["88.198.54.12"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { @@ -995,7 +995,7 @@ resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { records = ["168.119.7.188"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_nu + zone_id = local.dns_zone_ooni_nu } resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" { @@ -1003,7 +1003,7 @@ resource "aws_route53_record" "clickhouse1-prod-ooni-io-_a_" { records = ["142.132.254.225"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io } resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" { @@ -1011,7 +1011,7 @@ resource "aws_route53_record" "clickhouse2-prod-ooni-io-_A_" { records = ["88.198.54.12"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io } resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" { @@ -1019,5 +1019,13 @@ resource "aws_route53_record" "clickhouse3-prod-ooni-io-_A_" { records = ["168.119.7.188"] ttl = "60" type = "A" - zone_id = local.dns_root_zone_ooni_io + zone_id = local.dns_zone_ooni_io +} + +resource "aws_route53_record" "airflow-prod-ooni-io-_a_" { + name = "airflow.prod.ooni.io" + records = ["142.132.254.225"] + ttl = "60" + type = "A" + zone_id = local.dns_zone_ooni_io } From b6210874f10c779464124248ff8eb653969aed5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Fri, 20 Dec 2024 18:12:13 +0100 Subject: [PATCH 61/88] Point airflow deployment to main branch --- ansible/roles/oonidata_airflow/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/oonidata_airflow/tasks/main.yml b/ansible/roles/oonidata_airflow/tasks/main.yml index cec780a7..625ed6b2 100644 --- a/ansible/roles/oonidata_airflow/tasks/main.yml +++ b/ansible/roles/oonidata_airflow/tasks/main.yml @@ -21,7 +21,7 @@ ansible.builtin.git: repo: 'https://github.com/ooni/data.git' dest: /opt/airflow/oonidata - version: airflow + version: main - ansible.builtin.include_role: name: ooni.airflow_role From b757ecd5dcb3a435bbb5a8b00548a3dce1fdfaec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 6 Jan 2025 13:42:16 +0200 Subject: [PATCH 62/88] Integrate docs from ooni/docs into devops repo (#133) Integrate legacy docs into main docs to work on updating it --- README.md | 2 - docs/DebianPackages.md | 30 + docs/DeprecatedDocs.md | 141 ++++ docs/IncidentResponse.md | 362 ++++++++++ docs/Infrastructure.md | 360 ++++++++++ docs/LegacyDocs.md | 182 +++++ docs/MonitoringAlerts.md | 612 +++++++++++++++++ docs/Runbooks.md | 1155 ++++++++++++++++++++++++++++++++ docs/Tools.md | 211 ++++++ docs/disk-increase.md | 17 - docs/merge-tree-replication.md | 127 ---- scripts/build-docs.sh | 55 +- 12 files changed, 3079 insertions(+), 175 deletions(-) create mode 100644 docs/DebianPackages.md create mode 100644 docs/DeprecatedDocs.md create mode 100644 docs/IncidentResponse.md create mode 100644 docs/Infrastructure.md create mode 100644 docs/LegacyDocs.md create mode 100644 docs/MonitoringAlerts.md create mode 100644 docs/Runbooks.md create mode 100644 docs/Tools.md delete mode 100644 docs/disk-increase.md delete mode 100644 docs/merge-tree-replication.md diff --git a/README.md b/README.md index a5069ca4..2b9ebe76 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,5 @@ # OONI Devops -This document outlines some of the best practices we follow when developing and -deploying OONI services. ## Infrastructure Tiers diff --git a/docs/DebianPackages.md b/docs/DebianPackages.md new file mode 100644 index 00000000..53be5af3 --- /dev/null +++ b/docs/DebianPackages.md @@ -0,0 +1,30 @@ +# Debian packages + +**NOTE** The direction we are going with the new backend is that of dropping debian packaging of all backend API components and move to a dockerized deployment approach. + +This section lists the Debian packages used to deploy backend +components. They are built by [GitHub CI workflows](#github-ci-workflows) πŸ’‘ +and deployed using [The deployer tool](#the-deployer-tool) πŸ”§. See +[Debian package build and publish](#debian-package-build-and-publish) πŸ’‘. + + +#### ooni-api package +Debian package for the [API](#api) βš™ + + +#### fastpath package +Debian package for the [Fastpath](#fastpath) βš™ + + +#### detector package +Debian package for the +[Social media blocking event detector](#social-media-blocking-event-detector) βš™ + + +#### analysis package +The `analysis` Debian package contains various tools and runs various of +systemd timers, see [Systemd timers](#systemd-timers) πŸ’‘. + + +#### Analysis deployment +See [Backend component deployment](#backend-component-deployment) πŸ“’ diff --git a/docs/DeprecatedDocs.md b/docs/DeprecatedDocs.md new file mode 100644 index 00000000..113d91dc --- /dev/null +++ b/docs/DeprecatedDocs.md @@ -0,0 +1,141 @@ +## Test helper rotation runbook +This runbook provides hints to troubleshoot the rotation of test +helpers. In this scenario test helpers are not being rotated as expected +and their TLS certificates might be at risk of expiring. + +Steps: + +1. Review [Test helpers](#comp:test_helpers), [Test helper rotation](#comp:test_helper_rotation) and [Test helpers notebook](#test-helpers-notebook) πŸ“” + +2. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š. + Look at different timespans: + + a. The uptime of the test helpers should be staggered by a week + depending on [Test helper rotation](#test-helper-rotation) βš™. + +3. A summary of the live and last rotated test helper can be obtained + with: + +```sql +SELECT rdn, dns_zone, name, region, draining_at FROM test_helper_instances ORDER BY name DESC LIMIT 8 +``` + +4. The rotation tool can be started manually. It will always pick the + oldest host for rotation. ⚠️ Due to the propagation time of changes + in the DNS rotating many test helpers too quickly can impact the + probes. + + a. Log on [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ + + b. Check the last run using + `sudo systemctl status ooni-rotation.timer` + + c. Review the logs using `sudo journalctl -u ooni-rotation` + + d. Run `sudo systemctl restart ooni-rotation` and monitor the logs. + +5. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š + during and after the rotation. + + +### Test helpers failure runbook +This runbook presents a scenario where a test helper is causing probes +to fail their tests sporadically. It describes how to identify the +affected host and mitigate the issue but can also be used to investigate +other issues affecting the test helpers. + +It has been chosen because such kind of incidents can impact the quality +of measurements and can be relatively difficult to troubleshoot. + +For investigating glitches in the +[test helper rotation](#test-helper-rotation) βš™ see +[test helper rotation runbook](#test-helper-rotation-runbook) πŸ“’. + +In this scenario either an alert has been sent to the +[#ooni-bots](#topic:oonibots) [Slack](#slack) πŸ”§ channel by +the [test helper failure rate notebook](#test-helper-failure-rate-notebook) πŸ“” or something +else caused the investigation. +See [Alerting](#alerting) πŸ’‘ for details. + +Steps: + +1. Review [Test helpers](#test-helpers) βš™ + +2. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š. + Look at different timespans: + + a. The uptime of the test helpers should be staggered by a week + depending on [Test helper rotation](#test-helper-rotation) βš™. + + b. The in-flight requests and requests per second should be + consistent across hosts, except for `0.th.ooni.org`. See + [Test helpers list](#test-helpers-list) 🐝 for details. + + c. Review CPU load, memory usage and run duration percentiles. + +3. Review [Test helper failure rate notebook](#test-helper-failure-rate-notebook) πŸ“” + +4. For more detailed investigation there is also a [test helper notebook](https://jupyter.ooni.org/notebooks/notebooks/2023%20%5Bfederico%5D%20test%20helper%20metadata%20in%20fastpath.ipynb) + +5. Log on the hosts using + `ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -Snone root@0.th.ooni.org` + +6. Run `journalctl --since '1 hour ago'` or review logs using the query + below. + +7. Run `top`, `strace`, `tcpdump` as needed. + +8. The rotation tool can be started at any time to rotate away failing + test helpers. The rotation script will always pick the oldest host + for rotation. ⚠️ Due to the propagation time of changes in the DNS + rotating many test helpers too quickly can impact the probes. + + a. Log on [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ + + b. Check the last run using + `sudo systemctl status ooni-rotation.timer` + + c. Review the logs using `sudo journalctl -u ooni-rotation` + + d. Run `sudo systemctl restart ooni-rotation` and monitor the logs. + +9. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š + during and after the rotation. + +10. Summarize traffic hitting a test helper using the following commands: + + Top 10 miniooni probe IP addresses (Warning: this is sensitive data) + + `tail -n 100000 /var/log/nginx/access.log | grep miniooni | cut -d' ' -f1|sort|uniq -c|sort -nr|head` + + Similar, with anonimized IP addresses: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d'.' -f1-3 | head -n 10000 |sort|uniq -c|sort -nr|head` + + Number of requests from miniooni probe in 10-minutes buckets: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d' ' -f4 | cut -c1-17 | uniq -c` + + Number of requests from miniooni probe in 1-minute buckets: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d' ' -f4 | cut -c1-18 | uniq -c` + + Number of requests grouped by hour, cache HIT/MISS/etc, software name and version + + `head -n 100000 /var/log/nginx/access.log | awk '{print $4, $6, $13}' | cut -c1-15,22- | sort | uniq -c | sort -n` + +To extract data from the centralized log database +on [monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ you can use: + +``` sql +SELECT message FROM logs +WHERE SYSLOG_IDENTIFIER = 'oohelperd' +ORDER BY __REALTIME_TIMESTAMP DESC +LIMIT 10 +``` + +> **note** +> The table is indexed by `__REALTIME_TIMESTAMP`. Limiting the range by time can significantly increase query performance. + + +See [Selecting test helper for rotation](#selecting-test-helper-for-rotation) 🐞 diff --git a/docs/IncidentResponse.md b/docs/IncidentResponse.md new file mode 100644 index 00000000..6e98465c --- /dev/null +++ b/docs/IncidentResponse.md @@ -0,0 +1,362 @@ +# Incident response + +## On-call preparation +Review [Alerting](#alerting) πŸ’‘ and check +[Grafana dashboards](#grafana-dashboards) πŸ’‘ + +On Android devices the following apps can be used: + + * [Slack](#slack) πŸ”§ app with audible notifications from the + #ooni-bots channel + + * [Grafana](#grafana) πŸ”§ viewer + + +## Tiers and severities + +**TODO** Consolidate the tiers outlined here with the other tiers listed in the top level readme. + +When designing architecture of backend components or handling incidents it can be useful to have +defined severities and tiers. + +A set of guidelines are described at +This section presets a simplified approach to prioritizing incident response. + +In this case there is no distinction between severity and priority. Impact and response time are connected. + +Incidents and alarms from monitoring can be classified by severity levels based on their impact: + + - 1: Serious security breach or data loss; serious loss of privacy impacting users or team members; legal risks. + - 2: Downtime impacting service usability for a significant fraction of users; Serious security vulnerability. + Examples: probes being unable to submit measurements + - 3: Downtime or poor performance impacting secondary services; anything that can cause a level 2 event if not addressed within 24h; outages of monitoring infrastructure + - 4: Every other event that requires attention within 7 days + +Based on the set of severities, components can be classified in tier as follows: + + - tier 1: Anything that can cause a severity 1 (or less severe) event. + - tier 2: Anything that can cause a severity 2 (or less severe) event but not a severity 1. + - tier 3: Anything that can cause a severity 3 (or less severe) event but not a severity 1 or 2. + - ...and so on + +### Relations and dependencies between services + +Tiers are useful during design and deployment as a way to minimize risk of outages and avoid unexpected cascading failures. + +Having a low tier value should not be treated as a sign of "importance" for a component, but a liability. + +Pre-production deployment stages (e.g. testbed) have tier level >= 5 + +In this context a component can be a service as a whole, or a running process (daemon), a host, a hardware device, etc. +A component can contain other components. + +A component "A" is said to "hard depend" on another component "B" if an outage of B triggers an outage of A. + +It can also "soft depend" on another component if an outage of the latter triggers only a failure of a subsystem, or an ancillary feature or a reasonably short downtime. + +Regardless of tiers, components at a higher stage, (e.g. production) cannot depend and/or receive data from lower stages. The opposite is acceptable. + +Components can only hard-depend on other components at the same tier or with lower values. +E.g. a Tier 2 component can depend on a Tier 1 but not the other way around. +If it happens, the Tier 2 component should be immediatly re-classified as Tier 1 and treated accordingly (see below). + +E.g. anything that handles real-time failover for a service should be treated at the same tier (or lower value) as the service. + +Redundant components follow a special rule. For example, the "test helper" service provided to the probes, as a whole, should be considered tier 2 at least, +as it can impact all probes preventing them from running tests succesfully. +Yet, test helper processes and VMs can be considered tier 3 or even 4 if they sit behind a load balancer that can move traffic away from a failing host reliably +and with no significant downtime. + +Example: An active/standby database pair provides a tier 2 service. An automatic failover tool is triggered by a simple monitoring script. +Both have to be labeled tier 2. + + +### Handling incidents + +Depending on the severity of an event a different workflow can be followed. + +An example of incident management workflow can be: + +| Severity | Response time | Requires conference call | Requires call leader | Requires postmortem | Sterile | +| -------- | ------- | ------ | -------- | ------- | ------ | +| 1 | 2h | Yes | Yes | Yes | Yes | +| 2 | 8h | Yes | No | Yes | Yes | +| 3 | 24h | No | No | No | Yes | +| 4 | 7d | No | No | No | No | + +The term "sterile" is named after - during the investigation the only priority should be to solve the issue at hand. +Other investigations, discussions, meetings should be postponed. + +When in doubt around the severity of an event, always err on the safe side. + +### Regular operations + +Based on the tier of a component, development and operation can follow different rules. + +An example of incident management workflow can be: + +| Tier | Require architecture review | Require code review | Require 3rd party security review | Require Change Management | +| -------- | ------- | ------ | -------- | ------- | +| 1 | Yes | Yes | Yes | Yes | +| 2 | Yes | Yes | No | No | +| 3 | No | Yes | No | No | +| 4 | No | No | No | No | + +"Change Management" refers to planning operational changes in advance and having team members review the change to be deployed in advance. + +E.g. scheduling a meeting to perform a probe release, have 2 people reviewing the metrics before and after the change. + + +## Redundant notifications +If needed, a secondary channel for alert notification can be set up +using + +Ntfy can host a push notification topic for free. + +For example is currently being used to +notify the outcome of CI runs from + + +An Android app is available: + + +[Grafana](#grafana) πŸ”§ can be configured to send alerts to ntfy.sh +using a webhook. + +### Measurement drop tutorial + +This tutorial provides examples on how to investigate a drop in measurements. +It is based on an incident where a drop in measurement was detected and the cause was not immediately clear. + +It is not meant to be a step-by-step runbook but rather give hints on what data to look for, how to generate charts and identify the root cause of an incident. + +A dedicated issue can be used to track the incident and the investigation effort and provide visibility: +https://github.com/ooni/sysadmin/blob/master/.github/ISSUE_TEMPLATE/incident.md +The issue can be filed during or after the incident depending on urgency. + +Some of the examples below come from +https://jupyter.ooni.org/notebooks/notebooks/android_probe_release_msm_drop_investigation.ipynb +During an investigation it can be good to create a dedicated Jupyter notebook. + +We started with reviewing: + + * + No issues detected as the charts show a short timespan. + * The charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š. + No issues detected here. + * The [API and fastpath](#api-and-fastpath) πŸ“Š dashboard. + No issues detected here. + * The [Long term measurements prediction notebook](#long-term-measurements-prediction-notebook) πŸ“” + The decrease was clearly showing. + +Everything looked OK in terms of backend health. We then generated the following charts. + +The chunks of Python code below are meant to be run in +[Jupyter Notebook](#jupyter-notebook) πŸ”§ and are mostly "self-contained". +To be used you only need to import the +[Ooniutils microlibrary](#ooniutils-microlibrary) πŸ’‘: + +``` python +%run ooniutils.ipynb +``` + +The "t" label is commonly used on existing notebooks to refer to hour/day/week time slices. + +We want to plot how many measurements we are receiving from Ooniprobe Android in unattended runs, grouped by day and by `software_version`. + +The last line generates an area chart using Altair. Notice that the `x` and `y` and `color` parameters match the 3 columns extracted by the `SELECT`. + +The `GROUP BY` is performed on 2 of those 3 columns, while `COUNT(*)` is counting how many measurements exist in each t/software_version "bucket". + +The output of the SQL query is just a dataframe with 3 columns. There is no need to pivot or reindex it as Altair does the data transformation required. + +> **note** +> Altair refuses to process dataframes with more than 5000 rows. + +``` python +x = click_query(""" + SELECT + toStartOfDay(toStartOfWeek(measurement_start_time)) AS t, + software_version, + COUNT(*) AS msm_cnt + FROM fastpath + WHERE measurement_start_time > today() - interval 3 month + AND measurement_start_time < today() + AND software_name = 'ooniprobe-android-unattended' + GROUP BY t, software_version +""") +alt.Chart(x).mark_area().encode(x='t', y='msm_cnt', color='software_version').properties(width=1000, height=200, title="Android unattended msm cnt") +``` + +The generated chart was: + +![chart](../../../assets/images-backend/msm_drop_investigation_1.png) + +From the chart we concluded that the overall number of measurements have been decreasing since the release of a new version. +We also re-ran the plot by filtering on other `software_name` values and saw no other type of probe was affected. + +> **note** +> Due to a limitation in Altair, when grouping time by week use +> `toStartOfDay(toStartOfWeek(measurement_start_time)) AS t` + +Then we wanted to measure how many measurements are being collected during each `web_connectivity` test run. +This is to understand if probes are testing less measurements in each run. + +The following Python snippet uses nested SQL queries. The inner query groups measurements by time, `software_version` and `report_id`, +and counts how many measurements are related to each `report_id`. +The outer query "ignores" the `report_id` value and `quantile()` is used to extract the 50 percentile of `msm_cnt`. + +> **note** +> The use of double `%%` in `LIKE` is required to escape the `%` wildcard. The wildcard is used to match any amount of characters. + +``` python +x = click_query(""" + SELECT + t, + quantile(0.5)(msm_cnt) AS msm_cnt_p50, + software_version + FROM ( + SELECT + toStartOfDay(toStartOfWeek(measurement_start_time)) AS t, + software_version, + report_id, + COUNT(*) AS msm_cnt + FROM fastpath + WHERE measurement_start_time > today() - interval 3 month + AND test_name = 'web_connectivity' + AND measurement_start_time < today() + AND software_name = 'ooniprobe-android-unattended' + AND software_version LIKE '3.8%%' + GROUP BY t, software_version, report_id + ) GROUP BY t, software_version +""") +alt.Chart(x).mark_line().encode(x='t', y='msm_cnt_p50', color='software_version').properties(width=1000, height=200, title="Android unattended msmt count per report") +``` + +We also compared different version groups and different `software_name`. +The output shows that indeed the number of measurements for each run is significantly lower for the newly released versions. + +![chart](../../../assets/images-backend/msm_drop_investigation_4.png) + +To update the previous Python snippet to group measurements by a different field, change `software_version` into the new column name. +For example use `probe_cc` to show a chart with a breakdown by probe country name. You should change `software_version` once in each SELECT part, +then in the last two `GROUP BY`, and finally in the `color` line at the bottom. + +We did such change to confirm that all countries were impacted in the same way. (The output is not included here as not remarkable) + +Also, `mark_line` on the bottom line is used to create line charts. Switch it to `mark_area` to generate *stacked* area charts. +See the previous two charts as examples. + +We implemented a change to the API to improve logging the list of tests returned at check-in: +and reviewed monitored the logs using `sudo journalctl -f -u ooni-api`. + +The output showed that the API is very often returning 100 URLs to probes. + +We then ran a similar query to extract the test duration time by calculating +`MAX(measurement_start_time) - MIN(measurement_start_time) AS delta` for each `report_id` value: + +``` python +x = click_query(""" + SELECT t, quantile(0.5)(delta) AS deltaq, software_version + FROM ( + SELECT + toStartOfDay(toStartOfWeek(measurement_start_time)) AS t, + software_version, + report_id, + MAX(measurement_start_time) - MIN(measurement_start_time) AS delta + FROM fastpath + WHERE measurement_start_time > today() - interval 3 month + AND test_name = 'web_connectivity' + AND measurement_start_time < today() + AND software_name = 'ooniprobe-android-unattended' + AND software_version LIKE '3.8%%' + GROUP BY t, software_version, report_id + ) GROUP BY t, software_version +""") +alt.Chart(x).mark_line().encode(x='t', y='deltaq', color='software_version').properties(width=1000, height=200, title="Android unattended test run time") +``` + +![chart](../../../assets/images-backend/msm_drop_investigation_2.png) + +The chart showed that the tests are indeed running for a shorter amount of time. + +> **note** +> Percentiles can be more meaningful then averages. +> To calculate quantiles in ClickHouse use `quantile()()`. + +Example: + +``` sql +quantile(0.1)(delta) AS deltaq10 +``` + +Wondering if the slowdown was due to slower measurement execution or other issues, we also generated a table as follows. + +> **note** +> Showing color bars allows to visually inspect tables more quickly. Setting the axis value to `0`, `1` or `None` helps readability: +> `y.style.bar(axis=None)` + +Notice the `delta / msmcnt AS seconds_per_msm` calculation: + +``` python +y = click_query(""" + SELECT + quantile(0.1)(delta) AS deltaq10, + quantile(0.3)(delta) AS deltaq30, + quantile(0.5)(delta) AS deltaq50, + quantile(0.7)(delta) AS deltaq70, + quantile(0.9)(delta) AS deltaq90, + + quantile(0.5)(seconds_per_msm) AS seconds_per_msm_q50, + quantile(0.5)(msmcnt) AS msmcnt_q50, + + software_version, software_name + FROM ( + SELECT + software_version, software_name, + report_id, + MAX(measurement_start_time) - MIN(measurement_start_time) AS delta, + count(*) AS msmcnt, + delta / msmcnt AS seconds_per_msm + FROM fastpath + WHERE measurement_start_time > today() - interval 3 month + AND test_name = 'web_connectivity' + AND measurement_start_time < today() + AND software_name IN ['ooniprobe-android-unattended', 'ooniprobe-android'] + AND software_version LIKE '3.8%%' + GROUP BY software_version, report_id, software_name + ) GROUP BY software_version, software_name + ORDER by software_version, software_name ASC +""") +y.style.bar(axis=None) +``` + +![chart](../../../assets/images-backend/msm_drop_investigation_3.png) + +In the table we looked at the `seconds_per_msm_q50` column: the median time for running each test did not change significantly. + +To summarize: + * The backend appears to deliver the same amount of URLs to the Probes as usual. + * The time required to run each test is rougly the same. + * Both the number of measurements per run and the run time decreased in the new releases. + +## Github issues + +### Selecting test helper for rotation +See + + +### Document Tor targets +See + + +### Disable unnecessary ClickHouse system tables +See + + +### Feed fastpath from JSONL +See + + +### Implement Grafana dashboard and alarms backup +See diff --git a/docs/Infrastructure.md b/docs/Infrastructure.md new file mode 100644 index 00000000..8507c789 --- /dev/null +++ b/docs/Infrastructure.md @@ -0,0 +1,360 @@ +# Infrastructure + +Our infrastructure is primarily spread across the following providers: + +* Hetzner, for dedicated hosts +* DigitalOcean, for VPSs which require IPv6 support +* AWS, for most cloud based infrastrucutre hosting + +We manage the deployment and configuration of hosts through a combination of ansible and terraform. + +### Hosts + +This section provides a summary of the backend hosts described in the +rest of the document. + +A full list is available at + - +also see [Ansible](#ansible) πŸ”§ + +#### backend-fsn.ooni.org + +Public-facing production backend host, receiving the deployment of the +packages: + +- [ooni-api](legacybackend/operations/#ooni-api-package) πŸ“¦ + +- [fastpath](legacybackend/operations/#fastpath-package) πŸ“¦ + +- [analysis](legacybackend/operations/#analysis-package) πŸ“¦ + +- [detector](legacybackend/operations/#detector-package) πŸ“¦ + +#### backend-hel.ooni.org + +Standby / pre-production backend host. Runs the same software stack as +[backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯, plus the +[OONI bridges](#ooni-bridges) βš™ + +#### ams-pg-test.ooni.org + +Testbed backend host. Runs the same software stack as +[backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯. Database tables are not backed up and +incoming measurements are not uploaded to S3. All data is considered +ephemeral. + +#### monitoring.ooni.org + +Runs the internal monitoring stack, including +[Jupyter Notebook](#tool:jupyter), [Prometheus](#prometheus) πŸ”§, +[Vector](#vector) πŸ”§ and +[ClickHouse instance for logs](#clickhouse-instance-for-logs) βš™ + +### The Sysadmin repository + +This is a git repository living at +for internal use. It primarily contains: + +- Playbooks for [Ansible](#ansible) πŸ”§ + +- The [debops-ci tool](#debops-ci-tool) πŸ”§ + +- Scripts and tools including diagrams for + [DNS and Domains](#dns-and-domains) πŸ’‘ + +### Ansible + +Ansible is used to configure the OSes on the backend hosts and manage +the configuration of backend components. The playbooks are kept at + + +This manual supersedes + + +#### Installation and setup + +Install Ansible using a OS packages or a Python virtualenv. Ensure the +same major+minor version is used across the team. + +Secrets are stored in vaults using the `ansible/vault` script as a +wrapper for `ansible-vault`. Store encrypted variables with a `vault_` +prefix to allow using grep: +and link location of the variable using same name without prefix in +corresponding `vars.yml`. + +In order to access secrets stored inside of the vault, you will need a +copy of the vault password encrypted with your PGP key. This file should +be stored inside of `~/.ssh/ooni-sysadmin.vaultpw.gpg`. + +The file should be provided by other teammates and GPG-encrypted for your own GPG key. + +#### SSH Configuration + +You should configure your `~/.ssh/config` with the following: + +``` + IdentitiesOnly yes + ServerAliveInterval 120 + UserKnownHostsFile ~/.ssh/known_hosts ~/REPLACE_ME/sysadmin/ext/known_hosts + + host *.ooni.io + user YOUR_USERNAME + + host *.ooni.nu + user YOUR_USERNAME + + host *.ooni.org + user YOUR_USERNAME +``` + +Replace `~/REPLACE_ME/sysadmin/ext/known_hosts` to where you have cloned +the `ooni/sysadmin` repo. This will ensure you use the host key +fingeprints from this repo instead of just relying on TOFU. + +You should replace `YOUR_USERNAME` with your username from `adm_login`. + +On MacOS you may want to also add: + + host * + UseKeychain yes + +To use the Keychain to store passwords. + +### Ansible playbooks summary + +Usage: + + ./play deploy-.yml -l --diff -C + ./play deploy-.yml -l --diff + +> **warning** +> any minor error in configuration files or ansible's playbooks can be +> destructive for the backend infrastructure. Always test-run playbooks +> with `--diff` and `-C` at first and carefully verify configuration +> changes. After verification run the playbook without `-C` and verify +> again the applied changes. + +> **note** > [Etckeeper](#etckeeper) πŸ”§ can be useful to verify configuration +> changes from a different point of view. + +Some notable parts of the repository: + +A list of the backend hosts lives at + + +The backend deployment playbook lives at + + +Many playbooks depend on roles that configure the OS, named +`base-`, for example: + +for Debian Bookworm and + +for Debian Bullseye + +The nftables firewall is configured to read every `.nft` file under +`/etc/ooni/nftables/` and `/etc/ooni/nftables/`. This allows roles to +create small files to open a port each and keep the configuration as +close as possible to the ansible step that deploys a service. For +example: + + +> **note** +> Ansible announces its runs on [ooni-bots](##ooni-bots) πŸ’‘ unless running with `-C`. + +#### The root account + +Runbooks use ssh to log on the hosts using your own account and leveraging `sudo` to act as root. + +The only exception is when a new host is being deployed - in that case ansible will log in as root to create +individual accounts and lock out the root user. + +When running the entire runbook ansible might try to run it as root. +This can be avoided by selecting only the required tags using `-t `. + +Ideally the root user should be disabled after succesfully creating user accounts. + +#### Roles layout + +Ansible playbooks use multiple roles (see +[example](https://github.com/ooni/sysadmin/blob/master/ansible/deploy-backend.yml#L46)) +to deploy various components. + +Few roles use the `meta/main.yml` file to depend on other roles. See +[example](https://github.com/ooni/sysadmin/blob/master/ansible/roles/ooni-backend/meta/main.yml) + +> **note** +> The latter method should be used sparingly because ansible does not +> indicate where each task in a playbook is coming from. + +A diagram of the role dependencies for the deploy-backend.yml playbook: + +```mermaid + +flowchart LR + A(deploy-backend.yml) --> B(base-bullseye) + B -- meta --> G(adm) + A --> F(nftables) + A --> C(nginx-buster) + A --> D(dehydrated) + D -- meta --> C + E -- meta --> F + A --> E(ooni-backend) + style B fill:#eeffee + style C fill:#eeffee + style D fill:#eeffee + style E fill:#eeffee + style F fill:#eeffee + style G fill:#eeffee +``` + +A similar diagram for deploy-monitoring.yml: + +```mermaid + +flowchart LR + B -- meta --> G(adm) + M(deploy-monitoring.yml) --> B(base-bookworm) + M --> O(ooca-cert) + M --> F(nftables) + M --> D(dehydrated) -- meta --> N(nginx-buster) + M --> P(prometheus) + M --> X(blackbox-exporter) + M --> T(alertmanager) + style B fill:#eeffee + style D fill:#eeffee + style F fill:#eeffee + style G fill:#eeffee + style N fill:#eeffee + style O fill:#eeffee + style P fill:#eeffee + style T fill:#eeffee + style X fill:#eeffee +``` + +> **note** +> When deploying files or updating files already existing on the hosts it can be useful to add a note e.g. "Deployed by ansible, see ". +> This helps track down how files on the host were modified and why. + +### Etckeeper + +Etckeeper is deployed on backend +hosts and keeps the `/etc` directory under git version control. It +commits automatically on package deployment and on timed runs. It also +allows doing commits manually. + +To check for history of the /etc directory: + +```bash +sudo -i +cd /etc +git log --raw +``` + +And `git diff` for unmerged changes. + +Use `etckeeper commit ` to commit changes. + +:::tip +Etckeeper commits changes automatically when APT is used or on daily basis, whichever comes first. +::: + +### Team credential repository + +A private repository contains team +credentials, including username/password tuples, GPG keys and more. + +> **warning** +> The credential file is GPG-encrypted as `credentials.json.gpg`. Do not +> commit the cleartext `credentials.json` file. + +> **note** +> The credentials are stored in a JSON file to allow a flexible, +> hierarchical layout. This allow storing metadata like descriptions on +> account usage, dates of account creations, expiry, and credential +> rotation time. + +The tool checks JSON syntax and sorts keys automatically. + +#### Listing file contents + + git pull + make show + +#### Editing contents + + git pull + make edit + git commit credentials.json.gpg -m "" + git push + +#### Extracting a credential programmatically: + + git pull + ./extract 'grafana.username' + +> **note** +> this can be used to automate credential retrieval from other tools, e.g. +> [Ansible](#ansible) πŸ”§ + +#### Updating users allowed to decrypt the credentials file + +Edit `makefile` to add or remove recipients (see `--recipient`) + +Then run: + + git pull + make decrypt encrypt + git commit makefile credentials.json.gpg + git push + +### DNS diagrams + +#### A: + +See + + +The image is not included here due to space constraints. + +#### CNAME: + +![CNAME](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.CNAME.svg) + +#### MX: + +![MX](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.MX.svg) + +#### NS: + +![NS](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.NS.svg) + +#### TXT: + +![TXT](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.TXT.svg) + +#### HTTP Moved Permanently (HTTP code 301): + +![URL301](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.URL301.svg) + +#### HTTP Redirects: + +![URL](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.URL.svg) + +#### Updating DNS diagrams + +To update the diagrams use the sysadmin repository: + +Update the `./ext/dns.json` file: + + cd ansible + ./play ext-inventory.yml -t namecheap + cd .. + +Then run +to generate the charts: + + ./scripts/dnsgraph + +It will generate SVG files under the `./ext/` directory. Finally, commit +and push the dns.json and SVG files. diff --git a/docs/LegacyDocs.md b/docs/LegacyDocs.md new file mode 100644 index 00000000..785ae2f9 --- /dev/null +++ b/docs/LegacyDocs.md @@ -0,0 +1,182 @@ +# Legacy Docs + +**ATTENTION** this documentation speaks about topics that are still relevant, yet it may not be up to date with the currently defined best-practices or infrastructure status. + +### Creating new playbooks runbook + +**TODO** this needs to be rewritten to conform to the new policies + + +This runbook describe how to add new runbooks or modify existing runbooks to support new hosts. + +When adding a new host to an existing group, if no customization is required it is enough to modify `inventory` +and insert the hostname in the same locations as its peers. + +If the host requires small customization e.g. a different configuration file for the <>: + +1. add the hostname to `inventory` as described above +2. create "custom" blocks in `tasks/main.yml` to adapt the deployment steps to the new host using the `when:` syntax. + +For an example see: + +NOTE: Complex `when:` rules can lower the readability of `main.yml` + +When adding a new type of backend component that is different from anything already existing a new dedicated role can be created: + +1. add the hostname to `inventory` as described above +2. create a new playbook e.g. `ansible/deploy-newcomponent.yml` +3. copy files from an existing role into a new `ansible/roles/newcomponent` directory: + +- `ansible/roles/newcomponent/meta/main.yml` +- `ansible/roles/newcomponent/tasks/main.yml` +- `ansible/roles/newcomponent/templates/example_config_file` + +4. run `./play deploy-newcomponent.yml -l newhost.ooni.org --diff -C` and review the output +5. run `./play deploy-newcomponent.yml -l newhost.ooni.org --diff` and review the output + +Example: + +TIP: To ensure playbooks are robust and idemponent it can be beneficial to develop and test tasks incrementally by running the deployment commands often. + + +## Test helper rotation runbook +This runbook provides hints to troubleshoot the rotation of test +helpers. In this scenario test helpers are not being rotated as expected +and their TLS certificates might be at risk of expiring. + +Steps: + +1. Review [Test helpers](#comp:test_helpers), [Test helper rotation](#comp:test_helper_rotation) and [Test helpers notebook](#test-helpers-notebook) πŸ“” + +2. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š. + Look at different timespans: + + a. The uptime of the test helpers should be staggered by a week + depending on [Test helper rotation](#test-helper-rotation) βš™. + +3. A summary of the live and last rotated test helper can be obtained + with: + +```sql +SELECT rdn, dns_zone, name, region, draining_at FROM test_helper_instances ORDER BY name DESC LIMIT 8 +``` + +4. The rotation tool can be started manually. It will always pick the + oldest host for rotation. ⚠️ Due to the propagation time of changes + in the DNS rotating many test helpers too quickly can impact the + probes. + + a. Log on [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ + + b. Check the last run using + `sudo systemctl status ooni-rotation.timer` + + c. Review the logs using `sudo journalctl -u ooni-rotation` + + d. Run `sudo systemctl restart ooni-rotation` and monitor the logs. + +5. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š + during and after the rotation. + + +### Test helpers failure runbook +This runbook presents a scenario where a test helper is causing probes +to fail their tests sporadically. It describes how to identify the +affected host and mitigate the issue but can also be used to investigate +other issues affecting the test helpers. + +It has been chosen because such kind of incidents can impact the quality +of measurements and can be relatively difficult to troubleshoot. + +For investigating glitches in the +[test helper rotation](#test-helper-rotation) βš™ see +[test helper rotation runbook](#test-helper-rotation-runbook) πŸ“’. + +In this scenario either an alert has been sent to the +[#ooni-bots](#topic:oonibots) [Slack](#slack) πŸ”§ channel by +the [test helper failure rate notebook](#test-helper-failure-rate-notebook) πŸ“” or something +else caused the investigation. +See [Alerting](#alerting) πŸ’‘ for details. + +Steps: + +1. Review [Test helpers](#test-helpers) βš™ + +2. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š. + Look at different timespans: + + a. The uptime of the test helpers should be staggered by a week + depending on [Test helper rotation](#test-helper-rotation) βš™. + + b. The in-flight requests and requests per second should be + consistent across hosts, except for `0.th.ooni.org`. See + [Test helpers list](#test-helpers-list) 🐝 for details. + + c. Review CPU load, memory usage and run duration percentiles. + +3. Review [Test helper failure rate notebook](#test-helper-failure-rate-notebook) πŸ“” + +4. For more detailed investigation there is also a [test helper notebook](https://jupyter.ooni.org/notebooks/notebooks/2023%20%5Bfederico%5D%20test%20helper%20metadata%20in%20fastpath.ipynb) + +5. Log on the hosts using + `ssh -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null -Snone root@0.th.ooni.org` + +6. Run `journalctl --since '1 hour ago'` or review logs using the query + below. + +7. Run `top`, `strace`, `tcpdump` as needed. + +8. The rotation tool can be started at any time to rotate away failing + test helpers. The rotation script will always pick the oldest host + for rotation. ⚠️ Due to the propagation time of changes in the DNS + rotating many test helpers too quickly can impact the probes. + + a. Log on [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ + + b. Check the last run using + `sudo systemctl status ooni-rotation.timer` + + c. Review the logs using `sudo journalctl -u ooni-rotation` + + d. Run `sudo systemctl restart ooni-rotation` and monitor the logs. + +9. Review the charts on [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š + during and after the rotation. + +10. Summarize traffic hitting a test helper using the following commands: + + Top 10 miniooni probe IP addresses (Warning: this is sensitive data) + + `tail -n 100000 /var/log/nginx/access.log | grep miniooni | cut -d' ' -f1|sort|uniq -c|sort -nr|head` + + Similar, with anonimized IP addresses: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d'.' -f1-3 | head -n 10000 |sort|uniq -c|sort -nr|head` + + Number of requests from miniooni probe in 10-minutes buckets: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d' ' -f4 | cut -c1-17 | uniq -c` + + Number of requests from miniooni probe in 1-minute buckets: + + `grep POST /var/log/nginx/access.log | grep miniooni | cut -d' ' -f4 | cut -c1-18 | uniq -c` + + Number of requests grouped by hour, cache HIT/MISS/etc, software name and version + + `head -n 100000 /var/log/nginx/access.log | awk '{print $4, $6, $13}' | cut -c1-15,22- | sort | uniq -c | sort -n` + +To extract data from the centralized log database +on [monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ you can use: + +``` sql +SELECT message FROM logs +WHERE SYSLOG_IDENTIFIER = 'oohelperd' +ORDER BY __REALTIME_TIMESTAMP DESC +LIMIT 10 +``` + +> **note** +> The table is indexed by `__REALTIME_TIMESTAMP`. Limiting the range by time can significantly increase query performance. + + +See [Selecting test helper for rotation](#selecting-test-helper-for-rotation) 🐞 diff --git a/docs/MonitoringAlerts.md b/docs/MonitoringAlerts.md new file mode 100644 index 00000000..c4fb3b0b --- /dev/null +++ b/docs/MonitoringAlerts.md @@ -0,0 +1,612 @@ +# Monitoring and Alerts + +## Application metrics + All components of the backend are designed to output application + metrics. + + Metrics are prefixed with the name of each application. The metrics are + used in [Grafana](#grafana) πŸ”§ for charts, monitoring and alarming. + + They use the [StatsD](#statsd) πŸ’‘ protocol. + + Application metrics data flow: + + ![Diagram](https://kroki.io/blockdiag/svg/eNq9kc1qAyEUhffzFDLZNnGf0EBX7SoEkl0p4arXUaJe8QcKpe9eZ9Imkz5AXHo-OcdzhCN5VhYG9tUxhRqqK6dsICJ7ZolqUKgEfW469hKjsxKKpcDeJTlKjegXWmM7_UcjdlgUFJiro6Z1_8RMQj3emFJiXnM-2GKqWEnynChYLkCeMailIlk9hjL5cOFIcA82_OmnO33l1SJcTKcA-0Qei8GaH5shXn2nGK8JNIQH9zBcTKcA86mW29suDgS60T23d1ndjda4eX1X9O143B_-t9vg309uuu8fUvvJ0Q==) + + + + Ellipses represent data; rectangles represent processes. Purple + components belong to the backend. Click on the image and then click on + each shape to see related documentation. + + [Prometheus](#tool:prometheus) and [Grafana](#grafana) πŸ”§ provide + historical charts for more than 90 days and are useful to investigate + long-term trends. + + [Netdata](#netdata) πŸ”§ provides a web UI with real-time metrics. See + the dedicated subchapter for details. + + +### StatsD + All backend components send StatsD metrics over UDP using localhost as destination. + + This guarantees that applications never block on metric generation in + case the receiver slows down. The StatsD messages are received by + [Netdata](#netdata) πŸ”§. It automatically tracks any new metric, + generates averages and summaries as needed and exposes it to + [Prometheus](#prometheus) πŸ”§ for scraping. + In the codebase the statsd library is often used as: + + ```python + from .metrics import setup_metrics + setup_metrics(name="") + metrics.gauge("", ) + ``` + + Because of this, a quick way to identify where metrics are being generated + in the backend codebase is to search e.g.: + + * + * + + Where possible, timers have the same name as the function being timed e.g. + + + See [Conventions](#conventions) πŸ’‘ for patterns around component naming. + + +#### Metrics list + This subsection provides a list of the most important application metrics as they + are shown in Grafana. The names are autogenerated by Netdata based on the + metric name used in StatsD. + + For example a `@metrics.timer("generate_test_list")` Python decorator is used at: + . + Such timer will be processed by Netdata and appear in Grafana as: + ``` + netdata_statsd_timer_ooni_api_generate_test_list_milliseconds_average + ``` + + The metrics always start with `netdata_statsd` and end with: + + * `_milliseconds_average` + * `_events_persec_average` + * `_value_average` + + Also see + + TIP: StatsD collectors (like Netdata or others) preprocess datapoints by calculating average/min/max values etc. + + Run this to locate where in the backend codbase application metrics + are being generated: + + ```bash + find ~ -name '*.py' -exec grep 'metrics\.' -H "{}" \; + ``` + + Metrics for [ASN metadata updater](#asn-metadata-updater) βš™. + See the [ASN metadata updater dashboard](#asn-metadata-updater-dashboard) πŸ“Š: + +``` +netdata_statsd_asnmeta_updater_asnmeta_tmp_len_gauge_value_average +netdata_statsd_asnmeta_updater_asnmeta_update_progress_gauge_value_average +netdata_statsd_asnmeta_updater_fetch_data_timer_milliseconds_average +netdata_statsd_gauge_asnmeta_updater_asnmeta_tmp_len_value_average +netdata_statsd_gauge_asnmeta_updater_asnmeta_update_progress_value_average +netdata_statsd_timer_asnmeta_updater_fetch_data_milliseconds_average +``` + + +Metrics for [CitizenLab test list updater](#citizenlab-test-list-updater) βš™ + +``` +netdata_statsd_citizenlab_test_lists_updater_citizenlab_test_list_len_gauge_value_average +netdata_statsd_citizenlab_test_lists_updater_fetch_citizen_lab_lists_timer_milliseconds_average +netdata_statsd_citizenlab_test_lists_updater_update_citizenlab_table_timer_milliseconds_average +netdata_statsd_gauge_citizenlab_test_lists_updater_citizenlab_test_list_len_value_average +netdata_statsd_gauge_citizenlab_test_lists_updater_rowcount_value_average +netdata_statsd_timer_citizenlab_test_lists_updater_fetch_citizen_lab_lists_milliseconds_average +netdata_statsd_timer_citizenlab_test_lists_updater_rebuild_citizenlab_table_from_citizen_lab_lists_milliseconds_average +netdata_statsd_timer_citizenlab_test_lists_updater_update_citizenlab_table_milliseconds_average +``` + +Metrics for the [Database backup tool](#database-backup-tool) βš™. +See the [Database backup dashboard](#database-backup-dashboard) πŸ“Š on Grafana: + +``` +netdata_statsd_db_backup_run_export_timer_milliseconds_average +netdata_statsd_db_backup_status_gauge_value_average +netdata_statsd_db_backup_table_fastpath_backup_time_ms_gauge_value_average +netdata_statsd_db_backup_table_jsonl_backup_time_ms_gauge_value_average +netdata_statsd_db_backup_uploaded_bytes_tot_gauge_value_average +netdata_statsd_db_backup_upload_to_s3_timer_milliseconds_average +netdata_statsd_gauge_db_backup_status_value_average +netdata_statsd_gauge_db_backup_table_fastpath_backup_time_ms_value_average +netdata_statsd_gauge_db_backup_table_jsonl_backup_time_ms_value_average +netdata_statsd_gauge_db_backup_uploaded_bytes_tot_value_average +netdata_statsd_timer_db_backup_run_backup_milliseconds_average +netdata_statsd_timer_db_backup_run_export_milliseconds_average +netdata_statsd_timer_db_backup_upload_to_s3_milliseconds_average +netdata_statsd_gauge_db_backup_status_value_average +netdata_statsd_gauge_db_backup_table_citizenlab_byte_count_value_average +netdata_statsd_gauge_db_backup_table_fastpath_backup_time_ms_value_average +netdata_statsd_gauge_db_backup_table_fastpath_byte_count_value_average +netdata_statsd_gauge_db_backup_table_jsonl_backup_time_ms_value_average +netdata_statsd_gauge_db_backup_table_jsonl_byte_count_value_average +netdata_statsd_gauge_db_backup_uploaded_bytes_tot_value_average +netdata_statsd_timer_db_backup_backup_table_citizenlab_milliseconds_average +netdata_statsd_timer_db_backup_backup_table_fastpath_milliseconds_average +netdata_statsd_timer_db_backup_backup_table_jsonl_milliseconds_average +``` + + +Metrics for the [social media blocking event detector](#social-media-blocking-event-detector) βš™: + +``` +netdata_statsd_gauge_detector_blocking_events_tblsize_value_average +netdata_statsd_gauge_detector_blocking_status_tblsize_value_average +netdata_statsd_timer_detector_run_detection_milliseconds_average +``` + + +Metrics for the [Fastpath](#fastpath) βš™. Used in various dashboards, +primarily [API and fastpath](#api-and-fastpath) πŸ“Š dashboard. + +``` +netdata_statsd_timer_fastpath_db_clickhouse_upsert_summary_milliseconds_average +netdata_statsd_timer_fastpath_db_fetch_fingerprints_milliseconds_average +netdata_statsd_timer_fastpath_full_run_milliseconds_average +netdata_statsd_gauge_fastpath_recent_measurement_count_value_average +``` + + +Metrics [Fingerprint updater](#fingerprint-updater) βš™ +See the [Fingerprint updater dashboard](#fingerprint-updater-dashboard) πŸ“Š on Grafana. + +``` +netdata_statsd_timer_fingerprints_updater_fetch_csv_milliseconds_average +netdata_statsd_gauge_fingerprints_updater_fingerprints_dns_tmp_len_value_average +netdata_statsd_gauge_fingerprints_updater_fingerprints_http_tmp_len_value_average +netdata_statsd_gauge_fingerprints_updater_fingerprints_update_progress_value_average +``` + +Metrics from Nginx caching of the aggregation API. +See [Aggregation cache monitoring](#aggregation-cache-monitoring) 🐍 + +``` +netdata_statsd_gauge_nginx_aggregation_cache_EXPIRED_value_average +netdata_statsd_gauge_nginx_aggregation_cache_HIT_value_average +netdata_statsd_gauge_nginx_aggregation_cache_MISS_value_average +netdata_statsd_gauge_nginx_aggregation_cache_UPDATING_value_average +``` + +Metrics for the [API](#api) βš™. + +``` +netdata_statsd_counter_ooni_api_geoip_asn_differs_events_persec_average +netdata_statsd_counter_ooni_api_geoip_cc_differs_events_persec_average +netdata_statsd_counter_ooni_api_geoip_ipaddr_found_events_persec_average +netdata_statsd_counter_ooni_api_geoip_ipaddr_not_found_events_persec_average +netdata_statsd_counter_ooni_api_gunicorn_request_status_ +netdata_statsd_counter_ooni_api_probe_cc_asn_match_events_persec_average +netdata_statsd_counter_ooni_api_probe_cc_asn_nomatch_events_persec_average +netdata_statsd_counter_ooni_api_probe_legacy_login_successful_events_persec_average +netdata_statsd_counter_ooni_api_probe_login_successful_events_persec_average +netdata_statsd_counter_ooni_api_receive_measurement_count_events_persec_average +netdata_statsd_counter_ooni_api_receive_measurement_discard_asn_ +netdata_statsd_counter_ooni_api_receive_measurement_discard_cc_zz_events_persec_average +netdata_statsd_counter_ooni_api_uploader_msmt_count_events_persec_average +netdata_statsd_counter_ooni_api_uploader_postcan_count_events_persec_average +netdata_statsd_gauge_ooni_api_check_in_test_list_count_value_average +netdata_statsd_gauge_ooni_api_spool_post_count_value_average +netdata_statsd_gauge_ooni_api_test_list_urls_count_value_average +netdata_statsd_timer_ooni_api_apicall___api__v +netdata_statsd_timer_ooni_api_citizenlab_lock_time_milliseconds_average +netdata_statsd_timer_ooni_api_citizenlab_repo_init_milliseconds_average +netdata_statsd_timer_ooni_api_citizenlab_repo_pull_milliseconds_average +netdata_statsd_timer_ooni_api_fetch_citizenlab_data_milliseconds_average +netdata_statsd_timer_ooni_api_fetch_reactive_url_list_milliseconds_average +netdata_statsd_timer_ooni_api_generate_test_list_milliseconds_average +netdata_statsd_timer_ooni_api_get_aggregated_milliseconds_average +netdata_statsd_timer_ooni_api_get_measurement_meta_clickhouse_milliseconds_average +netdata_statsd_timer_ooni_api_get_measurement_meta_milliseconds_average +netdata_statsd_timer_ooni_api_get_raw_measurement_milliseconds_average +netdata_statsd_timer_ooni_api_get_torsf_stats_milliseconds_average +netdata_statsd_timer_ooni_api_gunicorn_request_duration_milliseconds_average +netdata_statsd_timer_ooni_api_open_report_milliseconds_average +netdata_statsd_timer_ooni_api_open_report_milliseconds_averageopen_report +netdata_statsd_timer_ooni_api_receive_measurement_milliseconds_average +netdata_statsd_timer_ooni_api_uploader_fill_jsonl_milliseconds_average +netdata_statsd_timer_ooni_api_uploader_fill_postcan_milliseconds_average +netdata_statsd_timer_ooni_api_uploader_total_run_time_milliseconds_average +netdata_statsd_timer_ooni_api_uploader_update_db_table_milliseconds_average +netdata_statsd_timer_ooni_api_uploader_upload_measurement_milliseconds_average +``` + +Metrics for the [GeoIP downloader](#geoip-downloader) βš™. + +``` +netdata_statsd_gauge_ooni_download_geoip_geoip_asn_epoch_value_average +netdata_statsd_gauge_ooni_download_geoip_geoip_asn_node_cnt_value_average +netdata_statsd_gauge_ooni_download_geoip_geoip_cc_epoch_value_average +netdata_statsd_gauge_ooni_download_geoip_geoip_cc_node_cnt_value_average +netdata_statsd_timer_ooni_download_geoip_download_geoip_milliseconds_average +``` + +Metrics for the [test helper rotation](#test-helper-rotation) βš™. + +``` +netdata_statsd_timer_rotation_create_le_do_ssl_cert_milliseconds_average +netdata_statsd_timer_rotation_deploy_ssl_cert_milliseconds_average +netdata_statsd_timer_rotation_destroy_drained_droplets_milliseconds_average +netdata_statsd_timer_rotation_end_to_end_test_milliseconds_average +netdata_statsd_timer_rotation_run_time_milliseconds_average +netdata_statsd_timer_rotation_scp_file_milliseconds_average +netdata_statsd_timer_rotation_setup_nginx_milliseconds_average +netdata_statsd_timer_rotation_setup_vector_milliseconds_average +netdata_statsd_timer_rotation_spawn_new_droplet_milliseconds_average +netdata_statsd_timer_rotation_ssh_reload_nginx_milliseconds_average +netdata_statsd_timer_rotation_ssh_restart_netdata_milliseconds_average +netdata_statsd_timer_rotation_ssh_restart_nginx_milliseconds_average +netdata_statsd_timer_rotation_ssh_restart_vector_milliseconds_average +netdata_statsd_timer_rotation_ssh_wait_droplet_warmup_milliseconds_average +netdata_statsd_timer_rotation_update_dns_records_milliseconds_average +``` + + +### Prometheus +Prometheus is a popular monitoring system and +runs on [monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ + +It is deployed and configured by [Ansible](#ansible) πŸ”§ using the +following playbook: + + +Most of the metrics are collected by scraping Prometheus endpoints, +Netdata, and using node exporter. The web UI is accessible at + + +#### Blackbox exporter +Blackbox exporter is part of Prometheus. It's a daemon that performs HTTP +probing against other hosts without relying on local agents (hence the name Blackbox) +and feeds the generated datapoints into Promethous. + +See + +It is deployed by +[Ansible](#tool:ansible) on the [monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ + +See +[Updating Blackbox Exporter runbook](#updating-blackbox-exporter-runbook) πŸ“’ + + +### Grafana dashboards +There is a number of dashboards on [Grafana](#grafana) πŸ”§ at + + +[Grafana](#grafana) πŸ”§ is deployed on the +[monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ host. See +[Monitoring deployment runbook](#monitoring-deployment-runbook) πŸ“’ for deployment. + +The dashboards are used for: + + * Routinely reviewing the general health of the backend infrastructure + + * Predicting long-term scaling requirements, i.e. + + * increasing disk space for the database + + * increasing CPU and memory requirements + + * Investigating alerts and troubleshooting incidents + + +#### Alerting +Alerts from [Grafana](#tool:grafana) and [Prometheus](#prometheus) πŸ”§ +are sent to the [#ooni-bots](#topic:oonibots) [Slack](#slack) πŸ”§ +channel by a bot. + +[Slack](#slack) πŸ”§ can be configured to provide desktop notification +from browsers and audible notifications on smartphones. + +Alert flow: + +![Diagram](https://kroki.io/blockdiag/svg/eNp1jUEKwjAQRfc9xTBd9wSioBtxV3ApIpNmYktjJiQpCuLdTbvQIDirP7zH_8pKN-qBrvCsQLOhyaZL7MkzrCHI5DRrJY9VBW2QG6eepwinTqyELGDN-YzBcxb2gQw5-kOxFnFDoyRFLBVjZmlRioVm86nLEY-WuhG27QGXt6z6YvIef4dmugtyjxwye70BaPFK1w==) + + + +The diagram does not explicitly include alertmanager. It is part of Prometheus and receives alerts and routes them to Slack. + +More detailed diagram: + +```mermaid +flowchart LR + P(Prometheus) -- datapoints --> G(Grafana) + G --> A(Alertmanager) + A --> S(Slack API) --> O(#ooni-bots) + P --> A + O --> R(Browser / apps) + J(Jupyter notebook) --> A + classDef box fill:#eeffee,stroke:#777,stroke-width:2px; + class P,G,A,S,O,R,J box; +``` + +In the diagram Prometheus receives, stores and serves datapoints and has some alert rules to trigger alerts. +Grafana acts as a UI for Prometheus and also triggers alerts based on alert rules configured in Grafana itself. + +Alertmanager is pretty simple - receives alerts and sends notification to Slack. + +The alert rules are listed at +The list also shows which alerts are firing at the moment, if any. There +is also a handful of alerts configured in [Prometheus](#prometheus) πŸ”§ +using [Ansible](#ansible) πŸ”§. + +The silences list shows if any alert has been temporarily silenced: + + +See [Grafana editing](#grafana-editing) πŸ“’ and +[Managing Grafana alert rules](#managing-grafana-alert-rules) πŸ“’ for details. + +There are also many dashboards and alerts configured in +[Jupyter Notebook](#jupyter-notebook) πŸ”§. These are meant for metrics that require more +complex algorithms, predictions and SQL queries that cannot be +implemented using [Grafana](#grafana) πŸ”§ e.g. when using machine learning or Pandas. +See [Ooniutils microlibrary](#ooniutils-microlibrary) πŸ’‘ for details. + +On many dashboards you can set the averaging timespan and the target +hostname using fields on the top left. + +Here is an overview of the most useful dashboards: + + +#### API and fastpath + + +This is the most important dashboard showing metrics of the +[API](#comp:api) and the [Fastpath](#fastpath) βš™. + + +#### Test-list repository in the API + + +This dashboard shows timings around the git repository checked out by the +[API](#api) βš™ that contains the test lists. + + +#### Measurement uploader dashboard + + +This dashboard shows metrics, timing and amounts of data transferred by the +[Measurement uploader](#measurement-uploader) βš™ + + +#### Fingerprint updater dashboard + + +This dashboard shows metrics and timing from the +[Fingerprint updater](#fingerprint-updater) βš™ + + +#### ClickHouse dashboard + + +This dashboards show ClickHouse-specific performance metrics. +It can be used for optimizations. + +For investigating slow queries also see the [ClickHouse queries notebook](#clickhouse-queries-notebook) πŸ“”. + + +#### HaProxy dashboard + + +Basic metrics from [HaProxy](#haproxy) βš™ load balancers. Used for +[OONI bridges](#ooni-bridges) βš™. + + +#### TLS certificate dashboard + + +Certificate expiration times. There are alerts configured in +[Grafana](#grafana) πŸ”§ to alert on expiring certificates. + + +#### Test helpers dashboard + + +Status, uptime and load metrics from the +[Test helpers](#test-helpers) βš™. + + +#### Database backup dashboard + + +Metrics, timing and data transferred by +[Database backup tool](#database-backup-tool) βš™ + +By looking at the last 24 hours of run you should be able to see the backup +being run + + +The "Status" chart shows the running status. +"Uploaded bytes in total" and "Backup time" should be self explanatory. + +TIP: If the backup time or size grows too much it could be worth alerting and considering implementing incremental backups. + + +#### Event detector dashboard + + +Basic metrics from the +[social media blocking event detector](#social-media-blocking-event-detector) βš™ + + +#### GeoIP MMDB database dashboard + + +Age and size of the GeoIP MMDB database. Also, a chart showing +discrepancies between the lookup performed by the probes VS the one in +the API, used to gauge the benefits of using a centralized solution. + +Also see [Geolocation script](#geolocation-script) 🐍 + +See [GeoIP downloader](#geoip-downloader) βš™ + + +#### Host clock offset dashboard + + +Measures NTP clock sync and alarms on big offsets + + +#### Netdata-specific dashboard + + +Shows all the metrics captured by [Netdata](#netdata) πŸ”§ - useful for +in-depth performance investigation. + + +#### ASN metadata updater dashboard + + +Progress, runtime and table size of the [ASN metadata updater](#asn-metadata-updater) βš™ + +See [Metrics list](#metrics-list) πŸ’‘ + + +### Netdata +Netdata is a monitoring agent that runs +locally on the backend servers. It exports host and +[Application metrics](#topic:appmetrics) to [Prometheus](#prometheus) πŸ”§. + +It also provides a web UI that can be accessed on port 19999. It can be +useful during development, performance optimization and debugging as it +provides metrics with higher time granularity (1 second) and almost no +delay. + +Netdata is not exposed on the Internet for security reasons and can be +accessed only when nededed by setting up port forwarding using SSH. For +example: + +```bash +ssh ams-pg-test.ooni.org -L 19998:127.0.0.1:19999 +``` + +Netdata can also be run on a development desktop and be accessed locally +in order to explore application metrics without having to deploy +[Prometheus](#tool:prometheus) and [Grafana](#grafana) πŸ”§. + +See [Netdata-specific dashboard](#netdata-specific-dashboard) πŸ“Š of an example of native +Netdata metrics. + + +## Log management +All components of the backend are designed to output logs to Systemd's +journald. They usually log using the component name as Systemd unit +name. + +Sometimes you might have to use `--identifier ` instead for +scripts that are not run as Systemd units. + +Journald automatically indexes logs by time, unit name and other items. +This allows to quickly filter logs during troubleshooting, for example: + +```bash +sudo journalctl -u ooni-api --since '10 m ago' +``` + +Or follow live logs using e.g.: + +```bash +sudo journalctl -u nginx -f +``` + +Sometimes it is useful to show milliseconds in the timestamps: + +```bash +sudo journalctl -f -u ooni-api -o short-precise +``` + +The logger used in Python components also sets additional fields, +notably CODE_FUNC and CODE_LINE + +Available fields can be listed using: + +```bash +sudo journalctl -f -u ooni-api -N | sort +``` + +It is possible to filter by those fields. It comes very handy for +debugging e.g.: + +```bash +sudo journalctl -f -u ooni-api CODE_FUNC=open_report +``` + +Every host running backend services also sends host to +monitoring.ooni.org using [Vector](#vector) πŸ”§. + +![Diagram](https://kroki.io/blockdiag/svg/eNrFks9qwzAMxu95CpNel_gYWOlgDEqfYJdRiv_IiYltBccphdF3n5yyNellt01H6ZO_nyxJh6rXVrTss2AajJhcOo2dGIDtWMQpaNASL9uCvQ6Ds0oki4FVL-wdVMJYjUCKuEhEUGDPt9QbNfQHnEZ46P9Q6DCSQ7kxBijKIynWTy40WWFM-cS6CGaXU11Kw_jMeWtTN8laoeeIwXIpVE_tlUY1eQhptuPSoeRe2PBdP63qtdeb8-y9xPgZ5N9A7t_3xwwqG3fZOHMUrKVDGPKBUCzWuF1vjIivD-LfboLCCQkuT-EJmcQ2tHWmrzG25U1yn71p9vumKWen6xdypu8x) + + +There is a dedicated ClickHouse instance on monitoring.ooni.org used to +collect logs. See the [ClickHouse instance for logs](#clickhouse-instance-for-logs) βš™. +This is done to avoid adding unnecessary load to the production database +on FSN that contains measurements and also keep a copy of FSN's logs on +a different host. + +The receiving [Vector](#vector) πŸ”§ instance and ClickHouse are +deployed and configured by [Ansible](#ansible) πŸ”§ using the following +playbook: + + +See [Logs from FSN notebook](#logs-from-fsn-notebook) πŸ“” and +[Logs investigation notebook](#logs-investigation-notebook) πŸ“” + + +### Slack +[Slack](https://slack.com/) is used for team messaging and automated +alerts at the following instance: + + +#### #ooni-bots +`#ooni-bots` is a [Slack](#slack) πŸ”§ channel used for automated +alerts: diff --git a/docs/Runbooks.md b/docs/Runbooks.md new file mode 100644 index 00000000..550d6978 --- /dev/null +++ b/docs/Runbooks.md @@ -0,0 +1,1155 @@ +# Runbooks + +Below you will find runbooks for common tasks and operations to manage our infra. + +## Monitoring deployment runbook + +The monitoring stack is deployed and configured by +[Ansible](#tool:ansible) on the [monitoring.ooni.org](#monitoring.ooni.org) πŸ–₯ +host using the following playbook: + + +It includes: + +- [Grafana](#grafana) πŸ”§ at + +- [Jupyter Notebook](#jupyter-notebook) πŸ”§ at + +- [Vector](#tool:vector) (see [Log management](#log-management) πŸ’‘) + +- local [Netdata](#tool:netdata), [Blackbox exporter](#blackbox-exporter) πŸ”§, etc + +- [Prometheus](#prometheus) πŸ”§ at + +It also configures the FQDNs: + +- loghost.ooni.org + +- monitoring.ooni.org + +- netdata.ooni.org + +This also includes the credentials to access the Web UIs. They are +deployed as `/etc/nginx/monitoring.htpasswd` from +`ansible/roles/monitoring/files/htpasswd` + +**Warning** the following steps are dangerously broken. Applying the changes +will either not work or worse break production. + +If you must do something of this sort, you will unfortunately have to resort of +specifying the particular substeps you want to run using the `-t` tag filter +(eg. `-t prometheus-conf` to update the prometheus configuration. + +Steps: + +1. Review [Ansible playbooks summary](#ansible-playbooks-summary) πŸ“’, + [Deploying a new host](#run:newhost) [Grafana dashboards](#grafana-dashboards) πŸ’‘. + +2. Run `./play deploy-monitoring.yml -l monitoring.ooni.org --diff -C` + and review the output + +3. Run `./play deploy-monitoring.yml -l monitoring.ooni.org --diff` and + review the output + +## Updating Blackbox Exporter runbook + +This runbook describes updating [Blackbox exporter](#blackbox-exporter) πŸ”§. + +The `blackbox_exporter` role in ansible is pulled in by the `deploy-monitoring.yml` +runbook. + +The configuration file is at `roles/blackbox_exporter/templates/blackbox.yml.j2` +together with `host_vars/monitoring.ooni.org/vars.yml`. + +To add a simple HTTP[S] check, for example, you can copy the "ooni website" block. + +Edit it and run the deployment of the monitoring stack as described in the previous subchapter. + +## Deploying a new host + +To deploy a new host: + +1. Choose a FQDN like \$name.ooni.org based on the + [DNS naming policy](#dns-naming-policy) πŸ’‘ + +2. Deploy the physical host or VM using Debian Stable + +3. Create `A` and `AAAA` records for the FQDN in the Namecheap web UI + +4. Follow [Updating DNS diagrams](#updating-dns-diagrams) πŸ“’ + +5. Review the `inventory` file and git-commit it + +6. Deploy the required stack. Run ansible it test mode first. For + example this would deploy a backend host: + + ./play deploy-backend.yml --diff -l .ooni.org -C + ./play deploy-backend.yml --diff -l .ooni.org + +7. Update [Prometheus](#prometheus) πŸ”§ by following + [Monitoring deployment runbook](#monitoring-deployment-runbook) πŸ“’ + +8. git-push the commits + +Also see [Monitoring deployment runbook](#monitoring-deployment-runbook) πŸ“’ for an +example of deployment. + +## Deleting a host + +1. Remove it from `inventory` + +2. Update the monitoring deployment using: + +``` +./play deploy-monitoring.yml -t prometheus-conf -l monitoring.ooni.org --diff +``` + +## Weekly measurements review runbook + +On a daily or weekly basis the following dashboards and Jupyter notebooks can be reviewed to detect unexpected patterns in measurements focusing on measurement drops, slowdowns or any potential issue affecting the backend infrastructure. + +When browsing the dashboards expand the time range to one year in order to spot long term trends. +Also zoom in to the last month to spot small glitches that could otherwise go unnoticed. + +Review the [API and fastpath](#api-and-fastpath) πŸ“Š dashboard for the production backend host[s] for measurement flow, CPU and memory load, +timings of various API calls, disk usage. + +Review the [Incoming measurements notebook](#incoming-measurements-notebook) πŸ“” for unexpected trends. + +Quickly review the following dashboards for unexpected changes: + + * [Long term measurements prediction notebook](#long-term-measurements-prediction-notebook) πŸ“” + * [Test helpers dashboard](#test-helpers-dashboard) πŸ“Š + * [Test helper failure rate notebook](#test-helper-failure-rate-notebook) πŸ“” + * [Database backup dashboard](#database-backup-dashboard) πŸ“Š + * [GeoIP MMDB database dashboard](#geoip-mmdb-database-dashboard) πŸ“Š + * [GeoIP dashboard](#geoip-mmdb-database-dashboard) πŸ“Š + * [Fingerprint updater dashboard](#fingerprint-updater-dashboard) πŸ“Š + * [ASN metadata updater dashboard](#asn-metadata-updater-dashboard) πŸ“Š + +Also check for glitches like notebooks not being run etc. + + +## Grafana backup runbook +This runbook describes how to back up dashboards and alarms in Grafana. +It does not include backing up datapoints stored in +[Prometheus](#prometheus) πŸ”§. + +The Grafana SQLite database can be dumped by running: + +```bash +sqlite3 -line /var/lib/grafana/grafana.db '.dump' > grafana_dump.sql +``` + +Future implementation is tracked in: +[Implement Grafana dashboard and alarms backup](#implement-grafana-dashboard-and-alarms-backup) 🐞 + + +## Grafana editing +This runbook describes adding new dashboards, panels and alerts in +[Grafana](#grafana) πŸ”§ + +To add a new dashboard use this + + +To add a new panel to an existing dashboard load the dashboard and then +click the \"Add\" button on the top. + +Many dashboards use variables. For example, on + +the variables `$host` and `$avgspan` are set on the top left and used in +metrics like: + + avg_over_time(netdata_disk_backlog_milliseconds_average{instance="$host:19999"}[$avgspan]) + + +### Managing Grafana alert rules +Alert rules can be listed at + +> **note** +> The list also shows which alerts are currently alarming, if any. + +Click the arrow on the left to expand each alerting rule. + +The list shows: + +![editing_alerts](../../../assets/images-backend/grafana_alerts_editing.png) + +> **note** +> When creating alerts it can be useful to add full URLs linking to +> dashboards, runbooks etc. + +To stop notifications create a \"silence\" either: + +1. by further expanding an alert rule (see below) and clicking the + \"Silence\" button + +2. by inputting it in + +Screenshot: + +![adding_silence](../../../assets/images-backend/grafana_alerts_silence.png) + +Additionally, the \"Show state history\" button is useful especially +with flapping alerts. + + +### Adding new fingerprints +This is performed on + +Updates are fetched automatically by +[Fingerprint updater](#fingerprint-updater) βš™ + +Also see [Fingerprint updater dashboard](#fingerprint-updater-dashboard) πŸ“Š. + + +### Backend code changes +This runbook describes making changes to backend components and +deploying them. + +Summary of the steps: + +1. Check out the backend repository. + +2. Create a dedicated branch. + +3. Update `debian/changelog` in the component you want to monify. See + [Package versioning](#package-versioning) πŸ’‘ for details. + +4. Run unit/functional/integ tests as needed. + +5. Create a pull request. + +6. Ensure the CI workflows are successful. + +7. Deploy the package on the testbed [ams-pg-test.ooni.org](#ams-pg-test.ooni.org) πŸ–₯ + and verify the change works as intended. + +8. Add a comment the PR with the deployed version and stage. + +9. Wait for the PR to be approved. + +10. Deploy the package to production on + [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯. Ensure it is the same version + that has been used on the testbed. See [API runbook](#api-runbook) πŸ“’ for + deployment steps. + +11. Add a comment the PR with the deployed version and stage, then merge + the PR. + +When introducing new metrics: + +1. Create [Grafana](#grafana) πŸ”§ dashboards, alerts and + [Jupyter Notebook](#jupyter-notebook) πŸ”§ and link them in the PR. + +2. Collect and analize metrics and logs from the testbed stages before + deploying to production. + +3. Test alarming by simulating incidents. +### Backend component deployment +This runbook provides general steps to deploy backend components on +production hosts. + +Review the package changelog and the related pull request. + +The amount of testing and monitoring required depends on: + +1. the impact of possible bugs in terms of number of users affected and + consequences + +2. the level of risk involved in rolling back the change, if needed + +3. the complexity of the change and the risk of unforeseen impact + +Monitor the [API and fastpath](#api-and-fastpath) πŸ“Š and dedicated . Review past +weeks for any anomaly before starting a deployment. + +Ensure that either the database schema is consistent with the new +deployment by creating tables and columns manually, or that the new +codebase is automatically updating the database. + +Quickly check past logs. + +Follow logs with: + +``` bash +sudo journalctl -f --no-hostname +``` + +While monitoring the logs, deploy the package using the +[The deployer tool](#the-deployer-tool) πŸ”§ tool. (Details on the tool subchapter) + + +### API runbook +This runbook describes making changes to the [API](#api) βš™ and +deploying it. + +Follow [Backend code changes](#backend-code-changes) πŸ“’ and +[Backend component deployment](#backend-component-deployment) πŸ“’. + +In addition, monitor logs from Nginx and API focusing on HTTP errors and +failing SQL queries. + +Manually check [Explorer](#explorer) πŸ–± and other +[Public and private web UIs](#public-and-private-web-uis) πŸ’‘ as needed. + + +#### Managing feature flags +To change feature flags in the API a simple pull request like + is enough. + +Follow [Backend code changes](#backend-code-changes) πŸ“’ and deploy it after +basic testing on [ams-pg-test.ooni.org](#ams-pg-test.ooni.org) πŸ–₯. + + +### Running database queries +This subsection describes how to run queries against +[ClickHouse](#clickhouse) βš™. You can run queries from +[Jupyter Notebook](#jupyter-notebook) πŸ”§ or from the CLI: + +```bash + ssh + $ clickhouse-client +``` + +Prefer using the default user when possible. To log in as admin: + +```bash + $ clickhouse-client -u admin --password +``` + +> **note** +> Heavy queries can impact the production database. When in doubt run them +> on the CLI interface in order to terminate them using CTRL-C if needed. + +> **warning** +> ClickHouse is not transactional! Always test queries that mutate schemas +> or data on testbeds like [ams-pg-test.ooni.org](#ams-pg-test.ooni.org) πŸ–₯ + +For long running queries see the use of timeouts in +[Fastpath deduplication](#fastpath-deduplication) πŸ“’ + +Also see [Dropping tables](#dropping-tables) πŸ“’, +[Investigating table sizes](#investigating-table-sizes) πŸ“’ + + +#### Modifying the fastpath table +This runbook show an example of changing the contents of the +[fastpath table](#fastpath-table) ⛁ by running a \"mutation\" query. + +> **warning** +> This method creates changes that cannot be reproduced by external +> researchers by [Reprocessing measurements](#reprocessing-measurements) πŸ“’. See +> [Reproducibility](#reproducibility) πŸ’‘ + +In this example [Signal test](#signal-test) Ⓣ measurements are being +flagged as failed due to + +Summarize affected measurements with: + +``` sql +SELECT test_version, msm_failure, count() +FROM fastpath +WHERE test_name = 'signal' AND measurement_start_time > '2023-11-06T16:00:00' +GROUP BY msm_failure, test_version +ORDER BY test_version ASC +``` + +> **important** +> `ALTER TABLE …​ UPDATE` starts a +> [mutation](https://clickhouse.com/docs/en/sql-reference/statements/alter#mutations) +> that runs in background. + +Check for any running or stuck mutation: + +``` sql +SELECT * FROM system.mutations WHERE is_done != 1 +``` + +Start the mutation: + +``` sql +ALTER TABLE fastpath +UPDATE + msm_failure = 't', + anomaly = 'f', + scores = '{"blocking_general":0.0,"blocking_global":0.0,"blocking_country":0.0,"blocking_isp":0.0,"blocking_local":0.0,"accuracy":0.0,"msg":"bad test_version"}' +WHERE test_name = 'signal' +AND measurement_start_time > '2023-11-06T16:00:00' +AND msm_failure = 'f' +``` + +Run the previous `SELECT` queries to monitor the mutation and its +outcome. + + +### Updating tor targets +See [Tor targets](#tor-targets) 🐝 for a general description. + +Review the [Ansible](#ansible) πŸ”§ chapter. Checkout the repository and +update the file `ansible/roles/ooni-backend/templates/tor_targets.json` + +Commit the changes and deploy as usual: + + ./play deploy-backend.yml --diff -l ams-pg-test.ooni.org -t api -C + ./play deploy-backend.yml --diff -l ams-pg-test.ooni.org -t api + +Test the updated configuration, then: + + ./play deploy-backend.yml --diff -l backend-fsn.ooni.org -t api -C + ./play deploy-backend.yml --diff -l backend-fsn.ooni.org -t api + +git-push the changes. + +Implements [Document Tor targets](#document-tor-targets) 🐞 + + +### Creating admin API accounts +See [Auth](#auth) 🐝 for a description of the API entry points related +to account management. + +The API provides entry points to: + + * [get role](https://api.ooni.io/apidocs/#/default/get_api_v1_get_account_role__email_address_) + + * [set role](https://api.ooni.io/apidocs/#/default/post_api_v1_set_account_role). + +The latter is implemented +[here](https://github.com/ooni/backend/blob/0ec9fba0eb9c4c440dcb7456f2aab529561104ae/api/ooniapi/auth.py#L437). + +> **important** +> The default value for API accounts is `user`. For such accounts there is +> no need for a record in the `accounts` table. + +To change roles it is required to be authenticated and have a role as +`admin`. + +It is also possible to create or update roles by running SQL queries +directly on [ClickHouse](#clickhouse) βš™. This can be necessary to +create the initial `admin` account on a new deployment stage. + +A quick way to identify the account ID an user is to extract logs from +the [API](#api) βš™ either from the backend host or using +[Logs from FSN notebook](#logs-from-fsn-notebook) πŸ“” + +```bash +sudo journalctl --since '5 min ago' -u ooni-api | grep 'SELECT role FROM accounts WHERE account_id' -C5 +``` + +Example output: + + Nov 09 16:03:00 backend-fsn ooni-api[1763457]: DEBUG Query: SELECT role FROM accounts WHERE account_id = '' + +Then on the database test host: + +```bash +clickhouse-client +``` + +Then in the ClickHouse shell insert a record to give\`admin\` role to +the user. See [Running database queries](#running-database-queries) πŸ“’: + +```sql +INSERT INTO accounts (account_id, role) VALUES ('', 'admin') +``` + +`accounts` is an EmbeddedRocksDB table with `account_id` as primary key. +No record deduplication is necessary. + +To access the new role the user has to log out from web UIs and login +again. + +> **important** +> Account IDs are not the same across test and production instances. + +This is due to the use of a configuration variable +`ACCOUNT_ID_HASHING_KEY` in the hashing of the email address. The +parameter is read from the API configuration file. The values are +different across deployment stages as a security feature. + + +### Fastpath runbook + +#### Fastpath code changes and deployment +Review [Backend code changes](#backend-code-changes) πŸ“’ and +[Backend component deployment](#backend-component-deployment) πŸ“’ for changes and deployment of the +backend stack in general. + +Also see [Modifying the fastpath table](#modifying-the-fastpath-table) πŸ“’ + +In addition, monitor logs and [Grafana dashboards](#grafana-dashboards) πŸ’‘ +focusing on changes in incoming measurements. + +You can use the [The deployer tool](#the-deployer-tool) πŸ”§ tool to perform +deployment and rollbacks of the [Fastpath](#fastpath) βš™. + +> **important** +> the fastpath is configured **not** to restart automatically during +> deployment. + +Always monitor logs and restart it as needed: + +```bash +sudo systemctl restart fastpath +``` + + +#### Fastpath manual deployment +Sometimes it can be useful to run APT directly: + +```bash +ssh +sudo apt-get update +apt-cache show fastpath | grep Ver | head -n5 +sudo apt-get install fastpath= +``` + + +#### Reprocessing measurements +Reprocess old measurement by running the fastpath manually. This can be +done without shutting down the fastpath instance running on live +measurements. + +You can run the fastpath as root or using the fastpath user. Both users +are able to read the configuration file under `/etc/ooni`. The fastpath +will download [Postcans](#postcans) πŸ’‘ in the local directory. + +`fastpath -h` generates: + + usage: + OONI Fastpath + + See README.adoc + + [-h] [--start-day START_DAY] [--end-day END_DAY] + [--devel] [--noapi] [--stdout] [--debug] + [--db-uri DB_URI] + [--clickhouse-url CLICKHOUSE_URL] [--update] + [--stop-after STOP_AFTER] [--no-write-to-db] + [--keep-s3-cache] [--ccs CCS] + [--testnames TESTNAMES] + + options: + -h, --help show this help message and exit + --start-day START_DAY + --end-day END_DAY + --devel Devel mode + --noapi Process measurements from S3 and do not start API feeder + --stdout Log to stdout + --debug Log at debug level + --clickhouse-url CLICKHOUSE_URL + ClickHouse url + --stop-after STOP_AFTER + Stop after feeding N measurements from S3 + --no-write-to-db Do not insert measurement in database + --ccs CCS Filter comma-separated CCs when feeding from S3 + --testnames TESTNAMES + Filter comma-separated test names when feeding from S3 (without + underscores) + +To run the fastpath manually use: + + ssh + sudo sudo -u fastpath /bin/bash + + fastpath --help + fastpath --start-day 2023-08-14 --end-day 2023-08-19 --noapi --stdout + +The `--no-write-to-db` option can be useful for testing. + +The `--ccs` and `--testnames` flags are useful to selectively reprocess +measurements. + +After reprocessing measurements it's recommended to manually deduplicate +the contents of the `fastpath` table. See +[Fastpath deduplication](#fastpath-deduplication) πŸ“’ + +> **note** +> it is possible to run multiple `fastpath` processes using +> with different time ranges. +> Running the reprocessing under `byobu` is recommended. + +The fastpath will pull [Postcans](#postcans) πŸ’‘ from S3. See +[Feed fastpath from JSONL](#feed-fastpath-from-jsonl) 🐞 for possible speedup. + + +#### Fastpath monitoring +The fastpath pipeline can be monitored using the +[Fastpath dashboard](#dash:api_fp) and [API and fastpath](#api-and-fastpath) πŸ“Š. + +Also follow real-time process using: + + sudo journalctl -f -u fastpath + + +### Android probe release runbook +This runbook is meant to help coordinate Android probe releases between +the probe and backend developers and public announcements. It does not +contain detailed instructions for individual components. + +Also see the [Measurement drop runbook](#measurement-drop-tutorial) πŸ“’. + + +Roles: \@probe, \@backend, \@media + + +#### Android pre-release +\@probe: drive the process involving the other teams as needed. Create +calendar events to track the next steps. Run the probe checklist + + +\@backend: review + +and + +for long-term trends + + +#### Android release +\@probe: release the probe for early adopters + +\@backend: monitor + +frequently during the first 24h and report any drop on +[Slack](#slack) πŸ”§ + +\@probe: wait at least 24h then release the probe for all users + +\@backend: monitor + +daily for 14 days and report any drop on [Slack](#slack) πŸ”§ + +\@probe: wait at least 24h then poke \@media to announce the release + +( + + +### CLI probe release runbook +This runbook is meant to help coordinate CLI probe releases between the +probe and backend developers and public announcements. It does not +contain detailed instructions for individual components. + +Roles: \@probe, \@backend, \@media + + +#### CLI pre-release +\@probe: drive the process involving the other teams as needed. Create +calendar events to track the next steps. Run the probe checklist and +review the CI. + +\@backend: review +\[jupyter\]() +and +\[grafana\]() +for long-term trends + + +#### CLI release +\@probe: release the probe for early adopters + +\@backend: monitor +\[jupyter\]() +frequently during the first 24h and report any drop on +[Slack](#slack) πŸ”§ + +\@probe: wait at least 24h then release the probe for all users + +\@backend: monitor +\[jupyter\]() +daily for 14 days and report any drop on [Slack](#slack) πŸ”§ + +\@probe: wait at least 24h then poke \@media to announce the release + + +### Investigating heavy aggregation queries runbook +In the following scenario the [Aggregation and MAT](#aggregation-and-mat) 🐝 API is +experiencing query timeouts impacting users. + +Reproduce the issue by setting a large enough time span on the MAT, +e.g.: + + +Click on the link to JSON, e.g. + + +Review the [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ metrics on + +(see [Netdata-specific dashboard](#netdata-specific-dashboard) πŸ“Š for details) + +Also review the [API and fastpath](#api-and-fastpath) πŸ“Š dashboard, looking at +CPU load, disk I/O, query time, measurement flow. + +Also see [Aggregation cache monitoring](#aggregation-cache-monitoring) 🐍 + +Refresh and review the charts on the [ClickHouse queries notebook](#clickhouse-queries-notebook) πŸ“”. + +In this instance frequent calls to the aggregation API are found. + +Review the summary of the API quotas. See +[Calling the API manually](#calling-the-api-manually) πŸ“’ for details: + + $ http https://api.ooni.io/api/_/quotas_summary Authorization:'Bearer ' + +Log on [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯ and review the logs: + + backend-fsn:~$ sudo journalctl --since '5 min ago' + +Summarize the subnets calling the API: + + backend-fsn:~$ sudo journalctl --since '5 hour ago' -u ooni-api -u nginx | grep aggreg | cut -d' ' -f 8 | sort | uniq -c | sort -nr | head + + 807 + 112 + 92 + 38 + 16 + 15 + 11 + 11 + 10 + +To block IP addresses or subnets see [Nginx](#nginx) βš™ or +[HaProxy](#haproxy) βš™, then configure the required file in +[Ansible](#ansible) πŸ”§ and deploy. + +Also see [Limiting scraping](#limiting-scraping) πŸ“’. + + +### Aggregation cache monitoring +To monitor cache hit/miss ratio using StatsD metrics the following +script can be run as needed. + +See [Metrics list](#metrics-list) πŸ’‘. + +``` python +import subprocess + +import statsd +metrics = statsd.StatsClient('localhost', 8125) + +def main(): + cmd = "sudo journalctl --since '5 min ago' -u nginx | grep 'GET /api/v1/aggregation' | cut -d ' ' -f 10 | sort | uniq -c" + out = subprocess.check_output(cmd, shell=True) + for line in out.splitlines(): + cnt, name = line.strip().split() + name = name.decode() + metrics.gauge(f"nginx_aggregation_cache_{name}", int(cnt)) + +if __name__ == '__main__': + main() +``` + + +### Limiting scraping +Aggressive bots and scrapers can be limited using a combination of +methods. Listed below ordered starting from the most user-friendly: + +1. Reduce the impact on the API (CPU, disk I/O, memory usage) by + caching the results. + +2. [Rate limiting and quotas](#rate-limiting-and-quotas) 🐝 already built in the API. It + might need lowering of the quotas. + +3. Adding API entry points to [Robots.txt](#robots.txt) 🐝 + +4. Adding specific `User-Agent` entries to [Robots.txt](#robots.txt) 🐝 + +5. Blocking IP addresses or subnets in the [Nginx](#nginx) βš™ or + [HaProxy](#haproxy) βš™ configuration files + +To add caching to the API or increase the expiration times: + +1. Identify API calls that cause significant load. [Nginx](#nginx) βš™ + is configured to log timing information for each HTTP request. See + [Logs investigation notebook](#logs-investigation-notebook) πŸ“” for examples. Also see + [Logs from FSN notebook](#logs-from-fsn-notebook) πŸ“” and + [ClickHouse instance for logs](#clickhouse-instance-for-logs) βš™. Additionally, + [Aggregation cache monitoring](#aggregation-cache-monitoring) 🐍 can be tweaked for the present use-case. + +2. Implement caching or increase expiration times across the API + codebase. See [API cache](#api-cache) πŸ’‘ and + [Purging Nginx cache](#purging-nginx-cache) πŸ“’. + +3. Monitor the improvement in terms of cache hit VS cache miss ratio. + +> **important** +> Caching can be applied selectively for API requests that return rapidly +> changing data VS old, stable data. See [Aggregation and MAT](#aggregation-and-mat) 🐝 +> for an example. + +To update the quotas edit the API here + +and deploy as usual. + +To update the `robots.txt` entry point see [Robots.txt](#robots.txt) 🐝 and +edit the API here +*init*.py#L124 +and deploy as usual + +To block IP addresses or subnets see [Nginx](#nginx) βš™ or +[HaProxy](#haproxy) βš™, then configure the required file in +[Ansible](#ansible) πŸ”§ and deploy. + + +### Calling the API manually +To make HTTP calls to the API manually you'll need to extact a JWT from +the browser, sometimes with admin rights. + +In Firefox, authenticate against , then +open Inspect \>\> Storage \>\> Local Storage \>\> Find +`{"token": ""}` + +Extract the token ascii-encoded string without braces nor quotes. + +Call the API using [httpie](https://httpie.io/) with: + + $ http https://api.ooni.io/ Authorization:'Bearer ' + +E.g.: + + $ http https://api.ooni.io/api/_/quotas_summary Authorization:'Bearer ' + +> **note** +> Do not leave whitespaces after \"Authorization:\" + + +### Build, deploy, rollback + +Host deployments are done with the +[sysadmin repo](https://github.com/ooni/sysadmin) + +For component updates a deployment pipeline is used: + +Look at the \[Status +dashboard\]() - be aware +of badge image caching + + +### The deployer tool +Deployments can be performed with a tool that acts as a frontend for +APT. It implements a simple Continuous Delivery workflow from CLI. It +does not require running a centralized CD pipeline server (e.g. like +) + +The tool is hosted on the backend repository together with its +configuration file for simplicity: + + +At start time it traverses the path from the current working directory +back to root until it finds a configuration file named deployer.ini This +allows using different deployment pipelines stored in configuration +files across different repositories and subdirectories. + +The tool connects to the hosts to perform deployments and requires sudo +rights. It installs Debian packages from repositories already configured +on the hosts. + +It runs `apt-get update` and then `apt-get install …​` to update or +rollback packages. By design, it does not interfere with manual +execution of apt-get or through tools like [Ansible](#ansible) πŸ”§. +This means operators can log on a host to do manual upgrade or rollback +of packages without breaking the deployer tool. + +The tool depends only on the `python3-apt` package. + +Here is a configuration file example, with comments: + +``` ini +[environment] +## Location on the path where SVG badges are stored +badges_path = /var/www/package_badges + + +## List of packages that are handled by the deployer, space separated +deb_packages = ooni-api fastpath analysis detector + + +## List of deployment stage names, space separated, from the least to the most critical +stages = test hel prod + + +## For each stage a block named stage: is required. +## The block lists the stage hosts. + + +## Example of an unused stage (not list under stages) +[stage:alpha] +hosts = localhost + +[stage:test] +hosts = ams-pg-test.ooni.org + +[stage:hel] +hosts = backend-hel.ooni.org + +[stage:prod] +hosts = backend-fsn.ooni.org +``` + +By running the tool without any argument it will connect to the hosts +from the configuration file and print a summary of the installed +packages, for example: + +``` bash +$ deployer + + Package test prod +ooni-api 1.0.79~pr751-194 1.0.79~pr751-194 +fastpath 0.81~pr748-191 β–Ίβ–Ί 0.77~pr705-119 +analysis 1.9~pr659-61 ⚠ 1.10~pr692-102 +detector 0.3~pr651-98 0.3~pr651-98 +``` + +The green arrows between two package versions indicates that the version +on the left side is higher than the one on the right side. This means +that a rollout is pending. In the example the fastpath package on the +\"prod\" stage can be updated. + +A red warning sign indicates that the version on the right side is +higher than the one on the left side. During a typical continuous +deployment workflow version numbers should always increment The rollout +should go from left to right, aka from the least critical stage to the +most critical stage. + +Deploy/rollback a given version on the \"test\" stage: + +``` bash +./deployer deploy ooni-api test 0.6~pr194-147 +``` + +Deploy latest build on the first stage: + +``` bash +./deployer deploy ooni-api +``` + +Deploy latest build on a given stage. This usage is not recommended as +it deploys the latest build regardless of what is currently running on +previous stages. + +``` bash +./deployer deploy ooni-api prod +``` + +The deployer tool can also generate SVG badges that can then served by +[Nginx](#nginx) βš™ or copied elsewhere to create a status dashboard. + +Example: + +![badge](../../../assets/images-backend/badge.png) + +Update all badges with: + +``` bash +./deployer refresh_badges +``` + + +### Adding new tests +This runbook describes how to add support for a new test in the +[Fastpath](#fastpath) βš™. + +Review [Backend code changes](#backend-code-changes) πŸ“’, then update +[fastpath core](https://github.com/ooni/backend/blob/0ec9fba0eb9c4c440dcb7456f2aab529561104ae/fastpath/fastpath/core.py) +to add a scoring function. + +See for example `def score_torsf(msm: dict) β†’ dict:` + +Also add an `if` block to the `def score_measurement(msm: dict) β†’ dict:` +function to call the newly created function. + +Finish by adding a new test to the `score_measurement` function and +adding relevant integration tests. + +Run the integration tests locally. + +Update the +[api](https://github.rom/ooni/backend/blob/0ec9fba0eb9c4c440dcb7456f2aab529561104ae/api/ooniapi/measurements.py#L491) +if needed. + +Deploy on [ams-pg-test.ooni.org](#ams-pg-test.ooni.org) πŸ–₯ and run end-to-end tests +using real probes. + + +### Adding support for a new test key +This runbook describes how to modify the [Fastpath](#fastpath) βš™ +and the [API](#api) βš™ to extract, process, store and publish a new measurement +field. + +Start with adding a new column to the [fastpath table](#fastpath-table) ⛁ +by following [Adding a new column to the fastpath](#adding-a-new-column-to-the-fastpath) πŸ“’. + +Add the column to the local ClickHouse instance used for tests and +[ams-pg-test.ooni.org](#ams-pg-test.ooni.org) πŸ–₯. + +Update as described in +[Continuous Deployment: Database schema changes](#continuous-deployment:-database-schema-changes) πŸ’‘ + +Add support for the new field in the fastpath `core.py` and `db.py` modules +and related tests. +See https://github.com/ooni/backend/pull/682 for a comprehensive example. + +Run tests locally, then open a draft pull request and ensure the CI tests are +running successfully. + +If needed, the current pull request can be reviewed and deployed without modifying the API to expose the new column. This allows processing data sooner while the API is still being worked on. + +Add support for the new column in the API. The change depends on where and how the +new value is to be published. +See for a generic example of updating an SQL query in the API and updating related tests. + +Deploy the changes on test and pre-production stages after creating the new column in the database. +See [The deployer tool](#the-deployer-tool) πŸ”§ for details. + +Perform end-to-end tests with real probes and [Public and private web UIs](#public-and-private-web-uis) πŸ’‘ as needed. + +Complete the pull request and deploy to production. + + +## Increasing the disk size on a dedicated host + +Below are some notes on how to resize the disks when a new drive is added to +our dedicated hosts: + +``` +fdisk /dev/nvme3n1 +# create gpt partition table and new RAID 5 (label 42) partition using the CLI +mdadm --manage /dev/md3 --add /dev/nvme3n1p1 +cat /proc/mdstat +# Take note of the volume count (4) and validate that nvme3n1p1 is marked as spare ("S") +mdadm --grow --raid-devices=4 /dev/md3 +``` + +``` +# resize2fs /dev/md3 +# df -h | grep md3 +/dev/md3 2.6T 1.2T 1.3T 48% / +``` + +## Replicating MergeTree tables + +Notes on how to go about converting a MergeTree family table to a replicated table, while minimizing downtime. + +See the following links for more information: + +- https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated/ +- https://clickhouse.com/docs/en/operations/system-tables/replicas +- https://clickhouse.com/docs/en/architecture/replication#verify-that-clickhouse-keeper-is-running +- https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication +- https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings + +### Workflow + +You should first create the replicated database cluster following the +instructions at the [clickhouse docs](https://clickhouse.com/docs/en/architecture/replication). + +The ooni-devops repo has a role called `oonidata_clickhouse` that does that by using the [idealista.clickhouse_role](https://github.com/idealista/clickhouse_role). + +Once the cluster is created you can proceed with creating a DATABASE on the cluster by running: + +``` +CREATE DATABASE ooni ON CLUSTER oonidata_cluster +``` + +There are now a few options to go about doing this: + +1. You just create the new replicated tables and perform a copy into the destination database by running on the source database the following: + +``` +INSERT INTO FUNCTION +remote('destination-database.ooni.nu', 'obs_web', 'USER', 'PASSWORD') +SELECT * from obs_web +``` + +This will require duplicating the data and might not be feasible. + +2. If you already have all the data setup on one host and you just want to convert the database into a replicate one, you can do the following: + +We assume there are 2 tables: `obs_web_bak` (which is the source table) and +`obs_web` which is the destination table. We also assume a single shard and +multiple replicas. + +First create the destination replicated table. To retrieve the table create query you can run: + +```sql +select create_table_query +from system.tables +where database = 'default' and table = 'obs_web' +``` + +You should then modify the table to make use of the `ReplicateReplacingMergeTree` engine: + +```sql +CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) +ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 +``` + +Check all the partitions that exist for the source table and produce ALTER queries to map them from the source to the destination: + +```sql +SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' AND active; +``` + +While you are running the following, you should stop all merges by running: + +```sql +SYSTEM STOP MERGES; +``` + +This can then be scripted like so: + +```sh +clickhouse-client -q "SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' format TabSeparatedRaw" | clickhouse-client -u write --password XXXX -mn +``` + +You will now have a replicated table existing on one of the replicas. + +Then you shall for each other replica in the set manually create the table, but this time pass in it explicitly the zookeeper path. + +You can get the zookeeper path by running the following on the first replica you have setup + +```sql +SELECT zookeeper_path FROM system.replicas WHERE table = 'obs_web'; +``` + +For each replica you will then have to create the tables like so: + +```sql +CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/oonidata_cluster/tables/ooni/obs_web/01', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) +ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 +``` + +You will then have to manually copy the data over to the destination replica from the source. + +The data lives inside of `/var/lib/clickhouse/data/{database_name}/{table_name}` + +Once the data has been copied over you should now have replicated the data and you can resume merges on all database by running: + +```sql +SYSTEM START MERGES; +``` + +### Creating tables on clusters + +```sql +CREATE TABLE ooni.obs_web_ctrl ON CLUSTER oonidata_cluster +(`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `hostname` String, `created_at` Nullable(DateTime64(3, 'UTC')), `ip` String, `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_failure` Nullable(String), `dns_success` Nullable(UInt8), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tls_failure` Nullable(String), `tls_success` Nullable(UInt8), `tls_server_name` Nullable(String), `http_request_url` Nullable(String), `http_failure` Nullable(String), `http_success` Nullable(UInt8), `http_response_body_length` Nullable(Int32)) +ENGINE = ReplicatedReplacingMergeTree( +'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', +'{replica}' +) +PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) +PRIMARY KEY (measurement_uid, observation_idx) ORDER BY (measurement_uid, observation_idx, measurement_start_time, hostname) SETTINGS index_granularity = 8192 +``` diff --git a/docs/Tools.md b/docs/Tools.md new file mode 100644 index 00000000..73d9f078 --- /dev/null +++ b/docs/Tools.md @@ -0,0 +1,211 @@ + +### Geolocation script +The following script can be used to compare the geolocation reported by +the probes submitting measurements compared to the geolocation of the +`/24` subnet the probe is coming from. It is meant to be run on +[backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯. + +``` python +##!/usr/bin/env python3 + +from time import sleep + +import systemd.journal +import geoip2.database # type: ignore + +asnfn = "/var/lib/ooniapi/asn.mmdb" +ccfn = "/var/lib/ooniapi/cc.mmdb" +geoip_asn_reader = geoip2.database.Reader(asnfn) +geoip_cc_reader = geoip2.database.Reader(ccfn) + + +def follow_journal(): + journal = systemd.journal.Reader() + #journal.seek_tail() + journal.get_previous() + journal.add_match(_SYSTEMD_UNIT="nginx.service") + while True: + try: + event = journal.wait(-1) + if event == systemd.journal.APPEND: + for entry in journal: + yield entry["MESSAGE"] + except Exception as e: + print(e) + sleep(0.1) + + +def geolookup(ipaddr: str): + cc = geoip_cc_reader.country(ipaddr).country.iso_code + asn = geoip_asn_reader.asn(ipaddr).autonomous_system_number + return cc, asn + + +def process(rawmsg): + if ' "POST /report/' not in rawmsg: + return + msg = rawmsg.strip().split() + ipaddr = msg[2] + ipaddr2 = msg[3] + path = msg[8][8:] + tsamp, tn, probe_cc, probe_asn, collector, rand = path.split("_") + geo_cc, geo_asn = geolookup(ipaddr) + proxied = 0 + probe_type = rawmsg.rsplit('"', 2)[-2] + if "," in probe_type: + return + if ipaddr2 != "0.0.0.0": + proxied = 1 + # Probably CloudFront, use second ipaddr + geo_cc, geo_asn = geolookup(ipaddr2) + + print(f"{probe_cc},{geo_cc},{probe_asn},{geo_asn},{proxied},{probe_type}") + + +def main(): + for msg in follow_journal(): + if msg is None: + break + try: + process(msg) + except Exception as e: + print(e) + sleep(0.1) + + +if __name__ == "__main__": + main() +``` + + +### Test list prioritization monitoring +The following script monitors prioritized test list for changes in URLs +for a set of countries. Outputs StatsS metrics. + +> **note** +> The prioritization system has been modified to work on a granularity of +> probe_cc + probe_asn rather than whole countries. + +Country-wise changes might be misleading. The script can be modified to +filter for a set of CCs+ASNs. + +``` python +##!/usr/bin/env python3 + +from time import sleep +import urllib.request +import json + +import statsd # debdeps: python3-statsd + +metrics = statsd.StatsClient("127.0.0.1", 8125, prefix="test-list-changes") + +CCs = ["GE", "IT", "US"] +THRESH = 100 + + +def peek(cc, listmap) -> None: + url = f"https://api.ooni.io/api/v1/test-list/urls?country_code={cc}&debug=True" + res = urllib.request.urlopen(url) + j = json.load(res) + top = j["results"][:THRESH] # list of dicts + top_urls = set(d["url"] for d in top) + + if cc in listmap: + old = listmap[cc] + changed = old.symmetric_difference(top_urls) + tot_cnt = len(old.union(top_urls)) + changed_ratio = len(changed) / tot_cnt * 100 + metrics.gauge(f"-{cc}", changed_ratio) + + listmap[cc] = top_urls + + +def main() -> None: + listmap = {} + while True: + for cc in CCs: + try: + peek(cc, listmap) + except Exception as e: + print(e) + sleep(1) + sleep(60 * 10) + + +if __name__ == "__main__": + main() +``` + +### Recompressing postcans on S3 +The following script can be used to compress .tar.gz files in the S3 data bucket. +It keeps a copy of the original files locally as a backup. +It terminates once a correctly compressed file is found. +Running the script on an AWS host close to the S3 bucket can significantly +speed up the process. + +Tested with the packages: + + * python3-boto3 1.28.49+dfsg-1 + * python3-magic 2:0.4.27-2 + +Set the ACCESS_KEY and SECRET_KEY environment variables. +Update the PREFIX variable as needed. + +```python +##!/usr/bin/env python3 +from os import getenv, rename +from sys import exit +import boto3 +import gzip +import magic + +BUCKET_NAME = "ooni-data-eu-fra-test" +## BUCKET_NAME = "ooni-data-eu-fra" +PREFIX = "raw/2021" + +def fetch_files(): + s3 = boto3.client( + "s3", + aws_access_key_id=getenv("ACCESS_KEY"), + aws_secret_access_key=getenv("SECRET_KEY"), + ) + cont_token = None + while True: + kw = {} if cont_token is None else dict(ContinuationToken=cont_token) + r = s3.list_objects_v2(Bucket=BUCKET_NAME, Prefix=PREFIX, **kw) + cont_token = r.get("NextContinuationToken", None) + for i in r.get("Contents", []): + k = i["Key"] + if k.endswith(".tar.gz"): + fn = k.rsplit("/", 1)[-1] + s3.download_file(BUCKET_NAME, k, fn) + yield k, fn + if cont_token is None: + return + +def main(): + s3res = session = boto3.Session( + aws_access_key_id=getenv("ACCESS_KEY"), + aws_secret_access_key=getenv("SECRET_KEY"), + ).resource("s3") + for s3key, fn in fetch_files(): + ft = magic.from_file(fn) + if "tar archive" not in ft: + print(f"found {ft} at {s3key}") + # continue # simply ignore already compressed files + exit() # stop when compressed files are found + tarfn = fn[:-3] + rename(fn, tarfn) # keep the local file as a backup + with open(tarfn, "rb") as f: + inp = f.read() + comp = gzip.compress(inp, compresslevel=9) + ratio = len(inp) / len(comp) + del inp + print(f"uploading {s3key} compression ratio {ratio}") + obj = s3res.Object(BUCKET_NAME, s3key) + obj.put(Body=comp) + del comp + +main() +``` diff --git a/docs/disk-increase.md b/docs/disk-increase.md deleted file mode 100644 index b977c991..00000000 --- a/docs/disk-increase.md +++ /dev/null @@ -1,17 +0,0 @@ -Below are some notes on how to resize the disks when a new drive is added to -our dedicated hosts: - -``` -fdisk /dev/nvme3n1 -# create gpt partition table and new RAID 5 (label 42) partition using the CLI -mdadm --manage /dev/md3 --add /dev/nvme3n1p1 -cat /proc/mdstat -# Take note of the volume count (4) and validate that nvme3n1p1 is marked as spare ("S") -mdadm --grow --raid-devices=4 /dev/md3 -``` - -``` -# resize2fs /dev/md3 -# df -h | grep md3 -/dev/md3 2.6T 1.2T 1.3T 48% / -``` diff --git a/docs/merge-tree-replication.md b/docs/merge-tree-replication.md deleted file mode 100644 index ac9e1e21..00000000 --- a/docs/merge-tree-replication.md +++ /dev/null @@ -1,127 +0,0 @@ -## Replicating MergeTree tables - -Notes on how to go about converting a MergeTree family table to a replicated table, while minimizing downtime. - -See the following links for more information: - -- https://kb.altinity.com/altinity-kb-setup-and-maintenance/altinity-kb-converting-mergetree-to-replicated/ -- https://clickhouse.com/docs/en/operations/system-tables/replicas -- https://clickhouse.com/docs/en/architecture/replication#verify-that-clickhouse-keeper-is-running -- https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/replication -- https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings - -### Workflow - -You should first create the replicated database cluster following the -instructions at the [clickhouse docs](https://clickhouse.com/docs/en/architecture/replication). - -The ooni-devops repo has a role called `oonidata_clickhouse` that does that by using the [idealista.clickhouse_role](https://github.com/idealista/clickhouse_role). - -Once the cluster is created you can proceed with creating a DATABASE on the cluster by running: - -``` -CREATE DATABASE ooni ON CLUSTER oonidata_cluster -``` - -There are now a few options to go about doing this: - -1. You just create the new replicated tables and perform a copy into the destination database by running on the source database the following: - -``` -INSERT INTO FUNCTION -remote('destination-database.ooni.nu', 'obs_web', 'USER', 'PASSWORD') -SELECT * from obs_web -``` - -This will require duplicating the data and might not be feasible. - -2. If you already have all the data setup on one host and you just want to convert the database into a replicate one, you can do the following: - -We assume there are 2 tables: `obs_web_bak` (which is the source table) and -`obs_web` which is the destination table. We also assume a single shard and -multiple replicas. - -First create the destination replicated table. To retrieve the table create query you can run: - -```sql -select create_table_query -from system.tables -where database = 'default' and table = 'obs_web' -``` - -You should then modify the table to make use of the `ReplicateReplacingMergeTree` engine: - -```sql -CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) -ENGINE = ReplicatedReplacingMergeTree( -'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', -'{replica}' -) -PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) -PRIMARY KEY (measurement_uid, observation_idx) -ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 -``` - -Check all the partitions that exist for the source table and produce ALTER queries to map them from the source to the destination: - -```sql -SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' AND active; -``` - -While you are running the following, you should stop all merges by running: - -```sql -SYSTEM STOP MERGES; -``` - -This can then be scripted like so: - -```sh -clickhouse-client -q "SELECT DISTINCT 'ALTER TABLE ooni.obs_web ATTACH PARTITION ID \'' || partition_id || '\' FROM obs_web_bak;' from system.parts WHERE table = 'obs_web_bak' format TabSeparatedRaw" | clickhouse-client -u write --password XXXX -mn -``` - -You will now have a replicated table existing on one of the replicas. - -Then you shall for each other replica in the set manually create the table, but this time pass in it explicitly the zookeeper path. - -You can get the zookeeper path by running the following on the first replica you have setup - -```sql -SELECT zookeeper_path FROM system.replicas WHERE table = 'obs_web'; -``` - -For each replica you will then have to create the tables like so: - -```sql -CREATE TABLE ooni.obs_web (`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `probe_asn` UInt32, `probe_cc` String, `probe_as_org_name` String, `probe_as_cc` String, `probe_as_name` String, `network_type` String, `platform` String, `origin` String, `engine_name` String, `engine_version` String, `architecture` String, `resolver_ip` String, `resolver_asn` UInt32, `resolver_cc` String, `resolver_as_org_name` String, `resolver_as_cc` String, `resolver_is_scrubbed` UInt8, `resolver_asn_probe` UInt32, `resolver_as_org_name_probe` String, `created_at` Nullable(DateTime('UTC')), `target_id` Nullable(String), `hostname` Nullable(String), `transaction_id` Nullable(UInt16), `ip` Nullable(String), `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_query_type` Nullable(String), `dns_failure` Nullable(String), `dns_engine` Nullable(String), `dns_engine_resolver_address` Nullable(String), `dns_answer_type` Nullable(String), `dns_answer` Nullable(String), `dns_answer_asn` Nullable(UInt32), `dns_answer_as_org_name` Nullable(String), `dns_t` Nullable(Float64), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tcp_t` Nullable(Float64), `tls_failure` Nullable(String), `tls_server_name` Nullable(String), `tls_version` Nullable(String), `tls_cipher_suite` Nullable(String), `tls_is_certificate_valid` Nullable(UInt8), `tls_end_entity_certificate_fingerprint` Nullable(String), `tls_end_entity_certificate_subject` Nullable(String), `tls_end_entity_certificate_subject_common_name` Nullable(String), `tls_end_entity_certificate_issuer` Nullable(String), `tls_end_entity_certificate_issuer_common_name` Nullable(String), `tls_end_entity_certificate_san_list` Array(String), `tls_end_entity_certificate_not_valid_after` Nullable(DateTime64(3, 'UTC')), `tls_end_entity_certificate_not_valid_before` Nullable(DateTime64(3, 'UTC')), `tls_certificate_chain_length` Nullable(UInt16), `tls_certificate_chain_fingerprints` Array(String), `tls_handshake_read_count` Nullable(UInt16), `tls_handshake_write_count` Nullable(UInt16), `tls_handshake_read_bytes` Nullable(UInt32), `tls_handshake_write_bytes` Nullable(UInt32), `tls_handshake_last_operation` Nullable(String), `tls_handshake_time` Nullable(Float64), `tls_t` Nullable(Float64), `http_request_url` Nullable(String), `http_network` Nullable(String), `http_alpn` Nullable(String), `http_failure` Nullable(String), `http_request_body_length` Nullable(UInt32), `http_request_method` Nullable(String), `http_runtime` Nullable(Float64), `http_response_body_length` Nullable(Int32), `http_response_body_is_truncated` Nullable(UInt8), `http_response_body_sha1` Nullable(String), `http_response_status_code` Nullable(UInt16), `http_response_header_location` Nullable(String), `http_response_header_server` Nullable(String), `http_request_redirect_from` Nullable(String), `http_request_body_is_truncated` Nullable(UInt8), `http_t` Nullable(Float64), `probe_analysis` Nullable(String)) -ENGINE = ReplicatedReplacingMergeTree( -'/clickhouse/oonidata_cluster/tables/ooni/obs_web/01', -'{replica}' -) -PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) -PRIMARY KEY (measurement_uid, observation_idx) -ORDER BY (measurement_uid, observation_idx, measurement_start_time, probe_cc, probe_asn) SETTINGS index_granularity = 8192 -``` - -You will then have to manually copy the data over to the destination replica from the source. - -The data lives inside of `/var/lib/clickhouse/data/{database_name}/{table_name}` - -Once the data has been copied over you should now have replicated the data and you can resume merges on all database by running: - -```sql -SYSTEM START MERGES; -``` - -### Creating tables on clusters - -```sql -CREATE TABLE ooni.obs_web_ctrl ON CLUSTER oonidata_cluster -(`measurement_uid` String, `observation_idx` UInt16, `input` Nullable(String), `report_id` String, `measurement_start_time` DateTime64(3, 'UTC'), `software_name` String, `software_version` String, `test_name` String, `test_version` String, `bucket_date` String, `hostname` String, `created_at` Nullable(DateTime64(3, 'UTC')), `ip` String, `port` Nullable(UInt16), `ip_asn` Nullable(UInt32), `ip_as_org_name` Nullable(String), `ip_as_cc` Nullable(String), `ip_cc` Nullable(String), `ip_is_bogon` Nullable(UInt8), `dns_failure` Nullable(String), `dns_success` Nullable(UInt8), `tcp_failure` Nullable(String), `tcp_success` Nullable(UInt8), `tls_failure` Nullable(String), `tls_success` Nullable(UInt8), `tls_server_name` Nullable(String), `http_request_url` Nullable(String), `http_failure` Nullable(String), `http_success` Nullable(UInt8), `http_response_body_length` Nullable(Int32)) -ENGINE = ReplicatedReplacingMergeTree( -'/clickhouse/{cluster}/tables/{database}/{table}/{shard}', -'{replica}' -) -PARTITION BY concat(substring(bucket_date, 1, 4), substring(bucket_date, 6, 2)) -PRIMARY KEY (measurement_uid, observation_idx) ORDER BY (measurement_uid, observation_idx, measurement_start_time, hostname) SETTINGS index_granularity = 8192 -``` diff --git a/scripts/build-docs.sh b/scripts/build-docs.sh index 0d049166..864b02ab 100755 --- a/scripts/build-docs.sh +++ b/scripts/build-docs.sh @@ -1,6 +1,7 @@ #!/bin/bash DOCS_ROOT=dist/docs/ REPO_NAME="ooni/devops" +MAIN_BRANCH="main" COMMIT_HASH=$(git rev-parse --short HEAD) mkdir -p $DOCS_ROOT @@ -12,38 +13,34 @@ strip_title() { cat $infile | awk 'BEGIN{p=1} /^#/{if(p){p=0; next}} {print}' } -cat <$DOCS_ROOT/00-index.md ---- -# Do not edit! This file is automatically generated -# to edit go to: https://github.com/$REPO_NAME/edit/main/README.md -# version: $REPO_NAME:$COMMIT_HASH -title: OONI Devops -description: OONI Devops -slug: devops ---- -EOF -strip_title README.md >> $DOCS_ROOT/00-index.md +generate_doc() { + local output_file="$1" + local title="$2" + local description="$3" + local slug="$4" + local input_file="$5" -cat <$DOCS_ROOT/01-iac.md + cat <"$DOCS_ROOT/$output_file" --- # Do not edit! This file is automatically generated -# to edit go to: https://github.com/$REPO_NAME/edit/main/tf/README.md -# version: $REPO_NAME:$COMMIT_HASH -title: OONI Devops IaC -description: OONI Devops IaC Documentation -slug: devops/iac +# version: $REPO_NAME/$input_file:$COMMIT_HASH +title: $title +description: $description +slug: $slug --- EOF -strip_title tf/README.md >> $DOCS_ROOT/01-iac.md + echo "[edit file](https://github.com/$REPO_NAME/edit/$MAIN_BRANCH/$input_file)" >> "$DOCS_ROOT/$output_file" + strip_title "$input_file" >> "$DOCS_ROOT/$output_file" +} -cat <$DOCS_ROOT/02-configuration-management.md ---- -# Do not edit! This file is automatically generated -# to edit go to: https://github.com/$REPO_NAME/edit/main/ansible/README.md -# version: $REPO_NAME:$COMMIT_HASH -title: OONI Devops Configuration Management -description: OONI Devops Configuration Management Documentation -slug: devops/configuration-management ---- -EOF -strip_title ansible/README.md >> $DOCS_ROOT/02-configuration-management.md \ No newline at end of file + +generate_doc "00-index.md" "OONI Devops" "OONI OONI Devops" "devops" "README.md" +generate_doc "01-infrastructure.md" "Infrastructure" "Infrastructure documentation" "devops/infrastructure" "docs/Infrastructure.md" +generate_doc "02-monitoring-alerts.md" "Monitoring" "Monitoring and Alerts documentation" "devops/monitoring" "docs/MonitoringAlerts.md" +generate_doc "03-runbooks.md" "Runbooks" "Runbooks docs" "devops/runbooks" "docs/Runbooks.md" +generate_doc "04-incident-response.md" "Incident response" "Incident response handling guidelines" "devops/incident-response" "docs/IncidentResponse.md" +generate_doc "05-terraform.md" "Terraform setup" "Terraform setup" "devops/terraform" "tf/README.md" +generate_doc "06-ansible.md" "Ansible setup" "Ansible setup" "devops/ansible" "ansible/README.md" +generate_doc "07-tools.md" "Misc Tools" "Misc Tools" "devops/tools" "docs/Tools.md" +generate_doc "08-debian-packages.md" "Debian Packages" "Debian Packages" "devops/debian-packages" "docs/DebianPackages.md" +generate_doc "09-legacy-docs.md" "Legacy Documentation" "Legacy Documentation" "devops/legacy-docs" "docs/LegacyDocs.md" From 8ac17795c88b16527587a24e94d0cea64a49d753 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 6 Jan 2025 15:04:10 +0100 Subject: [PATCH 63/88] Fix ordering of sidebar in devops --- scripts/build-docs.sh | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/scripts/build-docs.sh b/scripts/build-docs.sh index 864b02ab..2a26f8fe 100755 --- a/scripts/build-docs.sh +++ b/scripts/build-docs.sh @@ -14,11 +14,12 @@ strip_title() { } generate_doc() { - local output_file="$1" - local title="$2" - local description="$3" - local slug="$4" - local input_file="$5" + local slug="$1" + local order="$2" + local input_file="$3" + local output_file="$4" + local title="$5" + local description="$6" cat <"$DOCS_ROOT/$output_file" --- @@ -27,20 +28,21 @@ generate_doc() { title: $title description: $description slug: $slug +sidebar: + order: $order --- EOF echo "[edit file](https://github.com/$REPO_NAME/edit/$MAIN_BRANCH/$input_file)" >> "$DOCS_ROOT/$output_file" strip_title "$input_file" >> "$DOCS_ROOT/$output_file" } - -generate_doc "00-index.md" "OONI Devops" "OONI OONI Devops" "devops" "README.md" -generate_doc "01-infrastructure.md" "Infrastructure" "Infrastructure documentation" "devops/infrastructure" "docs/Infrastructure.md" -generate_doc "02-monitoring-alerts.md" "Monitoring" "Monitoring and Alerts documentation" "devops/monitoring" "docs/MonitoringAlerts.md" -generate_doc "03-runbooks.md" "Runbooks" "Runbooks docs" "devops/runbooks" "docs/Runbooks.md" -generate_doc "04-incident-response.md" "Incident response" "Incident response handling guidelines" "devops/incident-response" "docs/IncidentResponse.md" -generate_doc "05-terraform.md" "Terraform setup" "Terraform setup" "devops/terraform" "tf/README.md" -generate_doc "06-ansible.md" "Ansible setup" "Ansible setup" "devops/ansible" "ansible/README.md" -generate_doc "07-tools.md" "Misc Tools" "Misc Tools" "devops/tools" "docs/Tools.md" -generate_doc "08-debian-packages.md" "Debian Packages" "Debian Packages" "devops/debian-packages" "docs/DebianPackages.md" -generate_doc "09-legacy-docs.md" "Legacy Documentation" "Legacy Documentation" "devops/legacy-docs" "docs/LegacyDocs.md" +generate_doc 0 "README.md" "00-index.md" "OONI Devops" "OONI OONI Devops" "devops" +generate_doc 1 "docs/Infrastructure.md" "01-infrastructure.md" "Infrastructure" "Infrastructure documentation" "devops/infrastructure" +generate_doc 2 "docs/MonitoringAlerts.md" "02-monitoring-alerts.md" "Monitoring" "Monitoring and Alerts documentation" "devops/monitoring" +generate_doc 3 "docs/Runbooks.md" "03-runbooks.md" "Runbooks" "Runbooks docs" "devops/runbooks" +generate_doc 4 "docs/IncidentResponse.md" "04-incident-response.md" "Incident response" "Incident response handling guidelines" "devops/incident-response" +generate_doc 5 "tf/README.md" "05-terraform.md" "Terraform setup" "Terraform setup" "devops/terraform" +generate_doc 6 "ansible/README.md" "06-ansible.md" "Ansible setup" "Ansible setup" "devops/ansible" +generate_doc 7 "docs/Tools.md" "07-tools.md" "Misc Tools" "Misc Tools" "devops/tools" +generate_doc 8 "docs/DebianPackages.md" "08-debian-packages.md" "Debian Packages" "Debian Packages" "devops/debian-packages" +generate_doc 9 "docs/LegacyDocs.md" "09-legacy-docs.md" "Legacy Documentation" "Legacy Documentation" "devops/legacy-docs" From 903b3c7f4291b3b60adb086f9c4ba9a070b74f2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 6 Jan 2025 17:11:38 +0100 Subject: [PATCH 64/88] Updates to devops docs (#134) --- README.md | 116 +----- ansible/README.md | 174 +++++++- ...k-controller.yml => deploy-controller.yml} | 0 ansible/{ansible-playbook => play} | 0 .../ansible-galaxy.yml} | 0 ansible/requirements/python.yml | 3 + docs/IncidentResponse.md | 16 +- docs/Infrastructure.md | 379 ++++++------------ docs/LegacyDocs.md | 50 +++ scripts/build-docs.sh | 22 +- 10 files changed, 345 insertions(+), 415 deletions(-) rename ansible/{playbook-controller.yml => deploy-controller.yml} (100%) rename ansible/{ansible-playbook => play} (100%) rename ansible/{requirements.yml => requirements/ansible-galaxy.yml} (100%) create mode 100644 ansible/requirements/python.yml diff --git a/README.md b/README.md index 2b9ebe76..38671cf9 100644 --- a/README.md +++ b/README.md @@ -1,117 +1,3 @@ # OONI Devops - -## Infrastructure Tiers - -We divide our infrastructure components into 3 tiers: - -- **Tier 0: Critical**: These are mission critical infrastructure components. If these become unavailable or have significant disruption, it will have a major impact. - -- **Tier 1: Essential**: These components are important, but not as critical as - tier 0. They are part of our core operations, but if they become unavailable - the impact is important, but not major. - -- **Tier 2: Non-Essential**: These are auxiliary components. Their - unavailability does not have a major impact. - -### Tier 0 (Critical) components - -- [ ] Probe Services (collector specifically) -- [ ] Fastpath (part responsible for storing post-cans) -- [x] DNS configuration -- [ ] Monitoring -- [ ] OONI bridges -- [ ] OONI.org website -- [x] Web Connectivity test helpers -- [x] Code signing - -### Tier 1 (Essential) components - -- [ ] OONI API measurement listing -- [x] OONI Explorer -- [x] OONI Run -- [ ] OONI Data analysis pipeline -- [x] OONI Findings API -- [x] Website analytics - -### Tier 2 (Non-Essential) components - -- [ ] Test list editor -- [ ] Jupyter notebooks -- [ ] Countly - -## DNS and Domains - -The primary domains used by the backend are: -- `ooni.org` -- `ooni.io` -- `ooni.nu` - -### DNS naming policy - -The public facing name of services, follows this format: - -- `.ooni.org` - -Examples: - -- `explorer.ooni.org` -- `run.ooni.org` - -Public-facing means the FQDNs are used directly by external users, services, or -embedded in the probes. They cannot be changed or retired without causing -outages. - -Use public facing names sparingly and when possible start off by creating a -private name first. -Not every host needs to have a public facing name. For example staging and -testing environments might not have a public facing name. - -Each service also has public name which points to the specific host running that -service, and these are hosted in the `.io` zone. -This is helpful because sometimes you might have the same host running multiple -services or you might also have multiple services behind the same public service -endpoint (eg. in the case of an API gateway setup). - -Name in the `.io` zone should always include also the environment name they are -related to: - -- `.prod.ooni.io` for production services -- `.test.ooni.io` for test services - -When there may be multiple instances of a service running, you can append a -number to the service name. Otherwise the service name should be only alphabetic -characters. - -Examples: - -- `clickhouse.prod.ooni.io` -- `postgres0.prod.ooni.io` -- `postgres1.prod.ooni.io` -- `prometheus.prod.ooni.io` -- `grafana.prod.ooni.io` - -Finally, the actual host which runs the service, should have a FQDN defined -inside of the `.nu` zone. - -This might not apply to every host, especially in a cloud environment. The FQDN -in the `.nu` are the ones which are going to be stored in the ansible inventory -file and will be used as targets for configuration management. - -The structure of these domains is: - -- `..[prod|test].ooni.nu` - -The location tag can be either just the provider name or provider name `-` the location. - -Here is a list of location tags: - -- `htz-fsn`: Hetzner on Falkenstein -- `htz-hel`: Hetzner in Helsinki -- `grh-ams`: Greenhost in Amsterdam -- `grh-mia`: Greenhost in Miami -- `aws-fra`: AWS in Europe (Frankfurt) - -Examples: - -- `monitoring.htz-fsn.prod.ooni.nu` +This documentation contains information diff --git a/ansible/README.md b/ansible/README.md index 60da1de5..c91e3cdb 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -1,4 +1,14 @@ -### Quickstart +# Ansible + +**NOTE** We are currently in the process of migrating ansible configurations from [ooni/sysadmin](https://github.com/ooni/sysadmin) to [ooni/devops](https://github.com/ooni/devops). + +Ansible is used to configure the OSes on long term provisioned backend hosts and manage the configuration for these components. + +For example ansible will be used to configure the setup of VPSs and dedicated hosts that are provisioned manually or using terraform. + +In the case of hosts that are continously delivered, we instead use cloud-native configuration management tools. + +## Installation and setup It's recommended to make use of a virtualenv, for example managed using `pyenv virtualenv`: ``` @@ -6,17 +16,19 @@ pyenv virtualenv ooni-devops pyenv activate ooni-devops ``` -Install deps: -``` -pip install ansible dnspython boto3 passlib -``` +### Ansible setup -Install ansible galaxy modules: +You should then install the required python and ansible-galaxy depedencies with: ``` -ansible-galaxy install -r requirements.yml +pip install -r requirements/python.yml +ansible-galaxy install -r requirements/ansible-galaxy.yml ``` -Setup AWS credentials, you should add 2 profiles called `oonidevops_user_dev` and `oonidevops_user_prod` which have access to the development and production environment respectively +### AWS configuration + +You should then setup AWS credentials, by adding 2 profiles called `oonidevops_user_dev` and `oonidevops_user_prod` which have access to the development and production environment respectively. + +To this end edit your `~/.aws/credentials` file to contain: ``` [oonidevops_user_dev] @@ -36,11 +48,153 @@ region = eu-central-1 role_arn = arn:aws:iam::471112720364:role/oonidevops ``` -Run playbook: +### SSH configuration + +You should configure your `~/.ssh/config` with the following: + +``` + IdentitiesOnly yes + ServerAliveInterval 120 + UserKnownHostsFile ~/.ssh/known_hosts ~/REPLACE_ME/sysadmin/ext/known_hosts + + host *.ooni.io + user YOUR_USERNAME + + host *.ooni.nu + user YOUR_USERNAME + + host *.ooni.org + user YOUR_USERNAME +``` + +**TODO** restore ext/known_hosts setup + +Replace `~/REPLACE_ME/sysadmin/ext/known_hosts` to where you have cloned +the `ooni/sysadmin` repo. This will ensure you use the host key +fingeprints from this repo instead of just relying on TOFU. + +You should replace `YOUR_USERNAME` with your username from `adm_login`. + +On MacOS you may want to also add: + + host * + UseKeychain yes + +To use the Keychain to store passwords. + +## Running ansible playbooks + +Playbooks are run via an wrapper script called `./play` which notifies the slack #ooni-bots channel that a deployment has been triggered. + +``` +./play -i inventory deploy-.yml -l --diff -C +./play -i inventory deploy-.yml -l --diff +``` + +:::caution +any minor error in configuration files or ansible's playbooks can be +destructive for the backend infrastructure. Always test-run playbooks +with `--diff` and `-C` at first and carefully verify configuration +changes. After verification run the playbook without `-C` and verify +again the applied changes. +::: + +:::note +[Etckeeper](#etckeeper) πŸ”§ can be useful to verify configuration +changes from a different point of view. +::: + +In general there are two classes of playbooks: +* Those starting with `deploy-*.yml`, which are used to deploy specific components or pieces of components related to OONI infrastructure. All of these playbooks are included inside of `playbook.yml` to faciliate testing and ensuring that every component in our infrastucture is fully deployable. +* Those starting with `playbook-*` which are playbooks for specific tasks that may not be part of the main infrastructure deployment (eg. bootstrapping nodes once upon creation, creating snapshots of remote configurations, etc.) + +Some notable playbooks or roles are: + +The bootstrap playbook is in `playbook-bootstrap.yml` and is a playbook that should be run once when a new host is created. + +The nftables firewall is configured to read every `.nft` file under +`/etc/ooni/nftables/` and `/etc/ooni/nftables/`. This allows roles to +create small files to open a port each and keep the configuration as +close as possible to the ansible step that deploys a service. See this in use inside of the `nftables` role. + +#### The root account + +Runbooks use ssh to log on the hosts using your own account and leveraging `sudo` to act as root. + +The only exception is when a new host is being deployed - in that case ansible will log in as root to create +individual accounts and lock out the root user. + +When running the entire runbook ansible might try to run it as root. +This can be avoided by selecting only the required tags using `-t `. + +Ideally the root user should be disabled after succesfully creating user accounts. + +#### Roles layout + +Ansible playbooks use multiple roles (see +[example](https://github.com/ooni/sysadmin/blob/master/ansible/deploy-backend.yml#L46)) +to deploy various components. + +Few roles use the `meta/main.yml` file to depend on other roles. See +[example](https://github.com/ooni/sysadmin/blob/master/ansible/roles/ooni-backend/meta/main.yml) + +:::note +The latter method should be used sparingly because ansible does not +indicate where each task in a playbook is coming from. Moreover if a dependencies is specified twice inside of two roles, it will run twice. +::: + +A diagram of the role dependencies for the deploy-backend.yml playbook: + +```mermaid + +flowchart LR + A(deploy-backend.yml) --> B(base-bullseye) + B -- meta --> G(adm) + A --> F(nftables) + A --> C(nginx-buster) + A --> D(dehydrated) + D -- meta --> C + E -- meta --> F + A --> E(ooni-backend) + style B fill:#eeffee + style C fill:#eeffee + style D fill:#eeffee + style E fill:#eeffee + style F fill:#eeffee + style G fill:#eeffee ``` -ansible-playbook playbook.yml -i inventory + +A similar diagram for deploy-monitoring.yml: + +```mermaid + +flowchart LR + B -- meta --> G(adm) + M(deploy-monitoring.yml) --> B(base-bookworm) + M --> O(ooca-cert) + M --> F(nftables) + M --> D(dehydrated) -- meta --> N(nginx-buster) + M --> P(prometheus) + M --> X(blackbox-exporter) + M --> T(alertmanager) + style B fill:#eeffee + style D fill:#eeffee + style F fill:#eeffee + style G fill:#eeffee + style N fill:#eeffee + style O fill:#eeffee + style P fill:#eeffee + style T fill:#eeffee + style X fill:#eeffee ``` +:::note +When deploying files or updating files already existing on the hosts it can be useful to add a note e.g. "Deployed by ansible, see ". +This helps track down how files on the host were modified and why. +::: + +### Platform specific known bugs + On macOS you might run into this issue: https://github.com/ansible/ansible/issues/76322 The current workaround is to export the following environment variable before running ansible: diff --git a/ansible/playbook-controller.yml b/ansible/deploy-controller.yml similarity index 100% rename from ansible/playbook-controller.yml rename to ansible/deploy-controller.yml diff --git a/ansible/ansible-playbook b/ansible/play similarity index 100% rename from ansible/ansible-playbook rename to ansible/play diff --git a/ansible/requirements.yml b/ansible/requirements/ansible-galaxy.yml similarity index 100% rename from ansible/requirements.yml rename to ansible/requirements/ansible-galaxy.yml diff --git a/ansible/requirements/python.yml b/ansible/requirements/python.yml new file mode 100644 index 00000000..5c42f3dc --- /dev/null +++ b/ansible/requirements/python.yml @@ -0,0 +1,3 @@ +ansible==9.3.0 +boto3==1.34.65 +dnspython==2.6.1 diff --git a/docs/IncidentResponse.md b/docs/IncidentResponse.md index 6e98465c..bc12a048 100644 --- a/docs/IncidentResponse.md +++ b/docs/IncidentResponse.md @@ -12,9 +12,7 @@ On Android devices the following apps can be used: * [Grafana](#grafana) πŸ”§ viewer -## Tiers and severities - -**TODO** Consolidate the tiers outlined here with the other tiers listed in the top level readme. +## Severities When designing architecture of backend components or handling incidents it can be useful to have defined severities and tiers. @@ -27,17 +25,12 @@ In this case there is no distinction between severity and priority. Impact and r Incidents and alarms from monitoring can be classified by severity levels based on their impact: - 1: Serious security breach or data loss; serious loss of privacy impacting users or team members; legal risks. - - 2: Downtime impacting service usability for a significant fraction of users; Serious security vulnerability. + - 2: Downtime impacting service usability for a significant fraction of users or a tier 0 component; Serious security vulnerability. Examples: probes being unable to submit measurements - - 3: Downtime or poor performance impacting secondary services; anything that can cause a level 2 event if not addressed within 24h; outages of monitoring infrastructure + - 3: Downtime or poor performance impacting secondary services (tier 1 or above); anything that can cause a level 2 event if not addressed within 24h; outages of monitoring infrastructure - 4: Every other event that requires attention within 7 days -Based on the set of severities, components can be classified in tier as follows: - - - tier 1: Anything that can cause a severity 1 (or less severe) event. - - tier 2: Anything that can cause a severity 2 (or less severe) event but not a severity 1. - - tier 3: Anything that can cause a severity 3 (or less severe) event but not a severity 1 or 2. - - ...and so on +For an outline of infrastructure tiers see [infrastructure tiers](devops/infrastructure). ### Relations and dependencies between services @@ -70,7 +63,6 @@ and with no significant downtime. Example: An active/standby database pair provides a tier 2 service. An automatic failover tool is triggered by a simple monitoring script. Both have to be labeled tier 2. - ### Handling incidents Depending on the severity of an event a different workflow can be followed. diff --git a/docs/Infrastructure.md b/docs/Infrastructure.md index 8507c789..4b51eb42 100644 --- a/docs/Infrastructure.md +++ b/docs/Infrastructure.md @@ -8,7 +8,46 @@ Our infrastructure is primarily spread across the following providers: We manage the deployment and configuration of hosts through a combination of ansible and terraform. -### Hosts +## Infrastructure Tiers + +We divide our infrastructure components into 3 tiers: + +- **Tier 0: Critical**: These are mission critical infrastructure components. If these become unavailable or have significant disruption, it will have a major impact. + +- **Tier 1: Essential**: These components are important, but not as critical as + tier 0. They are part of our core operations, but if they become unavailable + the impact is important, but not major. + +- **Tier 2: Non-Essential**: These are auxiliary components. Their + unavailability does not have a major impact. + +### Tier 0 (Critical) components + +- [ ] Probe Services (collector specifically) +- [ ] Fastpath (part responsible for storing post-cans) +- [x] DNS configuration +- [ ] OONI bridges +- [x] Web Connectivity test helpers + +### Tier 1 (Essential) components + +- [ ] OONI API measurement listing +- [x] OONI Explorer +- [x] OONI Run +- [ ] Monitoring +- [ ] OONI.org website +- [x] Code signing +- [ ] OONI Data analysis pipeline +- [x] OONI Findings API +- [x] Website analytics + +### Tier 2 (Non-Essential) components + +- [ ] Test list editor +- [ ] Jupyter notebooks +- [ ] Countly + +## Hosts This section provides a summary of the backend hosts described in the rest of the document. @@ -17,7 +56,7 @@ A full list is available at - also see [Ansible](#ansible) πŸ”§ -#### backend-fsn.ooni.org +### backend-fsn.ooni.org Public-facing production backend host, receiving the deployment of the packages: @@ -30,213 +69,20 @@ packages: - [detector](legacybackend/operations/#detector-package) πŸ“¦ -#### backend-hel.ooni.org +### backend-hel.ooni.org Standby / pre-production backend host. Runs the same software stack as [backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯, plus the [OONI bridges](#ooni-bridges) βš™ -#### ams-pg-test.ooni.org - -Testbed backend host. Runs the same software stack as -[backend-fsn.ooni.org](#backend-fsn.ooni.org) πŸ–₯. Database tables are not backed up and -incoming measurements are not uploaded to S3. All data is considered -ephemeral. - -#### monitoring.ooni.org +### monitoring.ooni.org Runs the internal monitoring stack, including [Jupyter Notebook](#tool:jupyter), [Prometheus](#prometheus) πŸ”§, [Vector](#vector) πŸ”§ and [ClickHouse instance for logs](#clickhouse-instance-for-logs) βš™ -### The Sysadmin repository - -This is a git repository living at -for internal use. It primarily contains: - -- Playbooks for [Ansible](#ansible) πŸ”§ - -- The [debops-ci tool](#debops-ci-tool) πŸ”§ - -- Scripts and tools including diagrams for - [DNS and Domains](#dns-and-domains) πŸ’‘ - -### Ansible - -Ansible is used to configure the OSes on the backend hosts and manage -the configuration of backend components. The playbooks are kept at - - -This manual supersedes - - -#### Installation and setup - -Install Ansible using a OS packages or a Python virtualenv. Ensure the -same major+minor version is used across the team. - -Secrets are stored in vaults using the `ansible/vault` script as a -wrapper for `ansible-vault`. Store encrypted variables with a `vault_` -prefix to allow using grep: -and link location of the variable using same name without prefix in -corresponding `vars.yml`. - -In order to access secrets stored inside of the vault, you will need a -copy of the vault password encrypted with your PGP key. This file should -be stored inside of `~/.ssh/ooni-sysadmin.vaultpw.gpg`. - -The file should be provided by other teammates and GPG-encrypted for your own GPG key. - -#### SSH Configuration - -You should configure your `~/.ssh/config` with the following: - -``` - IdentitiesOnly yes - ServerAliveInterval 120 - UserKnownHostsFile ~/.ssh/known_hosts ~/REPLACE_ME/sysadmin/ext/known_hosts - - host *.ooni.io - user YOUR_USERNAME - - host *.ooni.nu - user YOUR_USERNAME - - host *.ooni.org - user YOUR_USERNAME -``` - -Replace `~/REPLACE_ME/sysadmin/ext/known_hosts` to where you have cloned -the `ooni/sysadmin` repo. This will ensure you use the host key -fingeprints from this repo instead of just relying on TOFU. - -You should replace `YOUR_USERNAME` with your username from `adm_login`. - -On MacOS you may want to also add: - - host * - UseKeychain yes - -To use the Keychain to store passwords. - -### Ansible playbooks summary - -Usage: - - ./play deploy-.yml -l --diff -C - ./play deploy-.yml -l --diff - -> **warning** -> any minor error in configuration files or ansible's playbooks can be -> destructive for the backend infrastructure. Always test-run playbooks -> with `--diff` and `-C` at first and carefully verify configuration -> changes. After verification run the playbook without `-C` and verify -> again the applied changes. - -> **note** > [Etckeeper](#etckeeper) πŸ”§ can be useful to verify configuration -> changes from a different point of view. - -Some notable parts of the repository: - -A list of the backend hosts lives at - - -The backend deployment playbook lives at - - -Many playbooks depend on roles that configure the OS, named -`base-`, for example: - -for Debian Bookworm and - -for Debian Bullseye - -The nftables firewall is configured to read every `.nft` file under -`/etc/ooni/nftables/` and `/etc/ooni/nftables/`. This allows roles to -create small files to open a port each and keep the configuration as -close as possible to the ansible step that deploys a service. For -example: - - -> **note** -> Ansible announces its runs on [ooni-bots](##ooni-bots) πŸ’‘ unless running with `-C`. - -#### The root account - -Runbooks use ssh to log on the hosts using your own account and leveraging `sudo` to act as root. - -The only exception is when a new host is being deployed - in that case ansible will log in as root to create -individual accounts and lock out the root user. - -When running the entire runbook ansible might try to run it as root. -This can be avoided by selecting only the required tags using `-t `. - -Ideally the root user should be disabled after succesfully creating user accounts. - -#### Roles layout - -Ansible playbooks use multiple roles (see -[example](https://github.com/ooni/sysadmin/blob/master/ansible/deploy-backend.yml#L46)) -to deploy various components. - -Few roles use the `meta/main.yml` file to depend on other roles. See -[example](https://github.com/ooni/sysadmin/blob/master/ansible/roles/ooni-backend/meta/main.yml) - -> **note** -> The latter method should be used sparingly because ansible does not -> indicate where each task in a playbook is coming from. - -A diagram of the role dependencies for the deploy-backend.yml playbook: - -```mermaid - -flowchart LR - A(deploy-backend.yml) --> B(base-bullseye) - B -- meta --> G(adm) - A --> F(nftables) - A --> C(nginx-buster) - A --> D(dehydrated) - D -- meta --> C - E -- meta --> F - A --> E(ooni-backend) - style B fill:#eeffee - style C fill:#eeffee - style D fill:#eeffee - style E fill:#eeffee - style F fill:#eeffee - style G fill:#eeffee -``` - -A similar diagram for deploy-monitoring.yml: - -```mermaid - -flowchart LR - B -- meta --> G(adm) - M(deploy-monitoring.yml) --> B(base-bookworm) - M --> O(ooca-cert) - M --> F(nftables) - M --> D(dehydrated) -- meta --> N(nginx-buster) - M --> P(prometheus) - M --> X(blackbox-exporter) - M --> T(alertmanager) - style B fill:#eeffee - style D fill:#eeffee - style F fill:#eeffee - style G fill:#eeffee - style N fill:#eeffee - style O fill:#eeffee - style P fill:#eeffee - style T fill:#eeffee - style X fill:#eeffee -``` - -> **note** -> When deploying files or updating files already existing on the hosts it can be useful to add a note e.g. "Deployed by ansible, see ". -> This helps track down how files on the host were modified and why. - -### Etckeeper +## Etckeeper Etckeeper is deployed on backend hosts and keeps the `/etc` directory under git version control. It @@ -259,102 +105,101 @@ Use `etckeeper commit ` to commit changes. Etckeeper commits changes automatically when APT is used or on daily basis, whichever comes first. ::: -### Team credential repository +## Devops credentials -A private repository contains team -credentials, including username/password tuples, GPG keys and more. +Credentials necessary for the deployment of backend infrastructure components should be stored inside of [AWS Systems Manager Parameter Store](https://docs.aws.amazon.com/systems-manager/latest/userguide/systems-manager-parameter-store.html). The same key name should be used in both production and development environment, but a different value shall be used across environments. -> **warning** -> The credential file is GPG-encrypted as `credentials.json.gpg`. Do not -> commit the cleartext `credentials.json` file. +:::note +We previously were using secrets manager, but are in the process of moving over all secerets to parameter store, see: https://github.com/ooni/devops/issues/114. -> **note** -> The credentials are stored in a JSON file to allow a flexible, -> hierarchical layout. This allow storing metadata like descriptions on -> account usage, dates of account creations, expiry, and credential -> rotation time. - -The tool checks JSON syntax and sorts keys automatically. - -#### Listing file contents - - git pull - make show - -#### Editing contents - - git pull - make edit - git commit credentials.json.gpg -m "" - git push - -#### Extracting a credential programmatically: - - git pull - ./extract 'grafana.username' - -> **note** -> this can be used to automate credential retrieval from other tools, e.g. -> [Ansible](#ansible) πŸ”§ +Once this is complete this note can be removed. +::: -#### Updating users allowed to decrypt the credentials file +## DNS and Domains -Edit `makefile` to add or remove recipients (see `--recipient`) +The primary domains used by the backend are: +- `ooni.org` +- `ooni.io` +- `ooni.nu` -Then run: +DNS is managed inside of route53. Where a static configuration is needed, this is added to the terraform `tf/environments/prod/dns_records.tf` file. For records that are being populated as part of IaC deployments, those can be registerred and written directly using terraform itself. - git pull - make decrypt encrypt - git commit makefile credentials.json.gpg - git push +For the `ooni.io` and `ooni.nu` zones, we also have delegated two sub zones each one for the `dev` and one for the `prod` environment. This allows the dev environment to manage it's own zone, like the production environment would, but also properly compatmentalize it. -### DNS diagrams +This leads us to having the following zones: +* `ooni.org` root zone, managed in the prod environment +* `ooni.io` root zone, managed in the prod environment +* `ooni.nu` root zone, managed in the prod environment +* `prod.ooni.io` delegated zone, managed in the prod environment +* `prod.ooni.nu` delegated zone, managed in the prod environment +* `dev.ooni.io` delegated zone, managed in the dev environment +* `dev.ooni.nu` delegated zone, managed in the dev environment -#### A: +### DNS naming policy -See - +The public facing name of services, follows this format: -The image is not included here due to space constraints. +- `.ooni.org` -#### CNAME: +Examples: -![CNAME](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.CNAME.svg) +- `explorer.ooni.org` +- `run.ooni.org` -#### MX: +Public-facing means the FQDNs are used directly by external users, services, or +embedded in the probes. They cannot be changed or retired without causing +outages. -![MX](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.MX.svg) +Use public facing names sparingly and when possible start off by creating a +private name first. +Not every host needs to have a public facing name. For example staging and +testing environments might not have a public facing name. -#### NS: +Each service also has public name which points to the specific host running that +service, and these are hosted in the `.io` zone. +This is helpful because sometimes you might have the same host running multiple +services or you might also have multiple services behind the same public service +endpoint (eg. in the case of an API gateway setup). -![NS](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.NS.svg) +Name in the `.io` zone should always include also the environment name they are +related to: -#### TXT: +- `.prod.ooni.io` for production services +- `.test.ooni.io` for test services -![TXT](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.TXT.svg) +When there may be multiple instances of a service running, you can append a +number to the service name. Otherwise the service name should be only alphabetic +characters. -#### HTTP Moved Permanently (HTTP code 301): +Examples: -![URL301](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.URL301.svg) +- `clickhouse.prod.ooni.io` +- `postgres0.prod.ooni.io` +- `postgres1.prod.ooni.io` +- `prometheus.prod.ooni.io` +- `grafana.prod.ooni.io` -#### HTTP Redirects: +Finally, the actual host which runs the service, should have a FQDN defined +inside of the `.nu` zone. -![URL](https://raw.githubusercontent.com/ooni/sysadmin/master/ext/dnsgraph.URL.svg) +This might not apply to every host, especially in a cloud environment. The FQDN +in the `.nu` are the ones which are going to be stored in the ansible inventory +file and will be used as targets for configuration management. -#### Updating DNS diagrams +The structure of these domains is: -To update the diagrams use the sysadmin repository: +- `..[prod|test].ooni.nu` -Update the `./ext/dns.json` file: +The location tag can be either just the provider name or provider name `-` the location. - cd ansible - ./play ext-inventory.yml -t namecheap - cd .. +Here is a list of location tags: -Then run -to generate the charts: +- `htz-fsn`: Hetzner on Falkenstein +- `htz-hel`: Hetzner in Helsinki +- `grh-ams`: Greenhost in Amsterdam +- `grh-mia`: Greenhost in Miami +- `aws-fra`: AWS in Europe (Frankfurt) - ./scripts/dnsgraph +Examples: -It will generate SVG files under the `./ext/` directory. Finally, commit -and push the dns.json and SVG files. +- `monitoring.htz-fsn.prod.ooni.nu` diff --git a/docs/LegacyDocs.md b/docs/LegacyDocs.md index 785ae2f9..ad68451c 100644 --- a/docs/LegacyDocs.md +++ b/docs/LegacyDocs.md @@ -180,3 +180,53 @@ LIMIT 10 See [Selecting test helper for rotation](#selecting-test-helper-for-rotation) 🐞 + +## Legacy credentials store + +A private repository contains team +credentials, including username/password tuples, GPG keys and more. + +> **warning** +> The credential file is GPG-encrypted as `credentials.json.gpg`. Do not +> commit the cleartext `credentials.json` file. + +> **note** +> The credentials are stored in a JSON file to allow a flexible, +> hierarchical layout. This allow storing metadata like descriptions on +> account usage, dates of account creations, expiry, and credential +> rotation time. + +The tool checks JSON syntax and sorts keys automatically. + + +#### Listing file contents + + git pull + make show + +#### Editing contents + + git pull + make edit + git commit credentials.json.gpg -m "" + git push + +#### Extracting a credential programmatically: + + git pull + ./extract 'grafana.username' + +> **note** +> this can be used to automate credential retrieval from other tools, e.g. +> [Ansible](#ansible) πŸ”§ + +#### Updating users allowed to decrypt the credentials file + +Edit `makefile` to add or remove recipients (see `--recipient`) + +Then run: + + git pull + make decrypt encrypt + git commit makefile credentials.json.gpg + git push diff --git a/scripts/build-docs.sh b/scripts/build-docs.sh index 2a26f8fe..7bc606da 100755 --- a/scripts/build-docs.sh +++ b/scripts/build-docs.sh @@ -14,8 +14,8 @@ strip_title() { } generate_doc() { - local slug="$1" - local order="$2" + local order="$1" + local slug="$2" local input_file="$3" local output_file="$4" local title="$5" @@ -36,13 +36,13 @@ EOF strip_title "$input_file" >> "$DOCS_ROOT/$output_file" } -generate_doc 0 "README.md" "00-index.md" "OONI Devops" "OONI OONI Devops" "devops" -generate_doc 1 "docs/Infrastructure.md" "01-infrastructure.md" "Infrastructure" "Infrastructure documentation" "devops/infrastructure" -generate_doc 2 "docs/MonitoringAlerts.md" "02-monitoring-alerts.md" "Monitoring" "Monitoring and Alerts documentation" "devops/monitoring" +generate_doc 0 "devops" "README.md" "00-index.md" "OONI Devops" "OONI OONI Devops" +generate_doc 1 "devops/infrastructure" "docs/Infrastructure.md" "01-infrastructure.md" "Infrastructure" "Infrastructure documentation" +generate_doc 2 "devops/monitoring" "docs/MonitoringAlerts.md" "02-monitoring-alerts.md" "Monitoring" "Monitoring and Alerts documentation" generate_doc 3 "docs/Runbooks.md" "03-runbooks.md" "Runbooks" "Runbooks docs" "devops/runbooks" -generate_doc 4 "docs/IncidentResponse.md" "04-incident-response.md" "Incident response" "Incident response handling guidelines" "devops/incident-response" -generate_doc 5 "tf/README.md" "05-terraform.md" "Terraform setup" "Terraform setup" "devops/terraform" -generate_doc 6 "ansible/README.md" "06-ansible.md" "Ansible setup" "Ansible setup" "devops/ansible" -generate_doc 7 "docs/Tools.md" "07-tools.md" "Misc Tools" "Misc Tools" "devops/tools" -generate_doc 8 "docs/DebianPackages.md" "08-debian-packages.md" "Debian Packages" "Debian Packages" "devops/debian-packages" -generate_doc 9 "docs/LegacyDocs.md" "09-legacy-docs.md" "Legacy Documentation" "Legacy Documentation" "devops/legacy-docs" +generate_doc 4 "devops/incident-response" "docs/IncidentResponse.md" "04-incident-response.md" "Incident response" "Incident response handling guidelines" +generate_doc 5 "devops/terraform" "tf/README.md" "05-terraform.md" "Terraform setup" "Terraform setup" +generate_doc 6 "devops/ansible" "ansible/README.md" "06-ansible.md" "Ansible setup" "Ansible setup" +generate_doc 7 "devops/tools" "docs/Tools.md" "07-tools.md" "Misc Tools" "Misc Tools" +generate_doc 8 "devops/debian-packages" "docs/DebianPackages.md" "08-debian-packages.md" "Debian Packages" "Debian Packages" +generate_doc 9 "devops/legacy-docs" "docs/LegacyDocs.md" "09-legacy-docs.md" "Legacy Documentation" "Legacy Documentation" From fa2ba444c1547a58857cc424fcc4205eca5058ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 6 Jan 2025 17:24:18 +0100 Subject: [PATCH 65/88] Add ssh-keygen command --- ansible/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ansible/README.md b/ansible/README.md index c91e3cdb..f8fc08f1 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -24,6 +24,14 @@ pip install -r requirements/python.yml ansible-galaxy install -r requirements/ansible-galaxy.yml ``` +In order to gain access to machines you will have to add your public key to the +`ssh_users` variable inside of `ansible/group_vars/all/vars.yml`. + +It's recommended you generate an `ed25519` key using the following command: +``` +ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519_ooni +``` + ### AWS configuration You should then setup AWS credentials, by adding 2 profiles called `oonidevops_user_dev` and `oonidevops_user_prod` which have access to the development and production environment respectively. From 911944c95cfd2dbd02d62550dade5e004da23fc0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Mon, 6 Jan 2025 18:44:38 +0100 Subject: [PATCH 66/88] Add high level infrastructure doc --- README.md | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 38671cf9..fa9b8c7a 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,26 @@ # OONI Devops -This documentation contains information +At a glance below is the overall architecture of OONI Infrastructure across our various locations: + +```mermaid +flowchart TB + apiorg([api.ooni.org])-->alb + apiio([api.ooni.io])-->backend + ecs[Backend API ECS]<-->ch[(Clickhouse Cluster)] + subgraph Hetzner + backend[OONI Backend Monolith]<-->ch + monitoring[Monitoring host] + pipeline[Pipeline v5] + end + subgraph AWS + alb[API Load Balancer]<-->ecs + alb-->backend + ecs<-->s3[(OONI S3 Buckets)] + s3<-->backend + end + subgraph Digital Ocean + th[Web Connectivity Test helper]<-->alb + end +``` + +For more details [Infrastructure docs](https://docs.ooni.org/devops/infrastructure/) From 63932d121a73b6a837ec9caf98b8a4155e45de85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Tue, 7 Jan 2025 10:41:25 +0100 Subject: [PATCH 67/88] Grant tony access to main tf env --- tf/environments/prod/main.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index f3a01dde..c70203bd 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -86,6 +86,7 @@ module "adm_iam_roles" { authorized_accounts = [ "arn:aws:iam::${local.ooni_main_org_id}:user/art", + "arn:aws:iam::${local.ooni_main_org_id}:user/tony", "arn:aws:iam::${local.ooni_main_org_id}:user/mehul" ] } From 7e2ea986c60373ddaf02746aa08e301598ebe8dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tony=20Morell=C3=A1=20Ll=C3=A1cer?= Date: Tue, 7 Jan 2025 14:11:07 +0100 Subject: [PATCH 68/88] Add Tony to ssh users (#137) --- ansible/group_vars/all/vars.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index c0b94053..3bf27a65 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -13,6 +13,11 @@ ssh_users: [ "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJsibU0nsQFFIdolD1POzXOws4VetV0ZNByINRzY8Hx0 arturo@ooni.org", ] + hynnot: + login: hynnot + comment: Tony Morella + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBTiOgr4PenzkF03NqFTGgacZ1BUWLkdCS1xNba4iLfP hynnot" majakomel: login: majakomel comment: Maja Komel From ed2f755670d1ebb9343af2a760abd8c7a8a45f8e Mon Sep 17 00:00:00 2001 From: Luis Diaz <41093870+LDiazN@users.noreply.github.com> Date: Tue, 7 Jan 2025 13:11:20 +0000 Subject: [PATCH 69/88] Access Request (#136) Add me to the ssh users --- ansible/group_vars/all/vars.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 3bf27a65..8f660d78 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -33,6 +33,11 @@ ssh_users: comment: Norbel Ambanumben keys: - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDBXprrutdT6AhrV9hWBKjyzq6RqGmCBWpWxi3qwJyRcBJfkiEYKV9QWl3H0g/Sg9JzLd9lWG2yfAai7cyBAT4Ih0+OhwQ0V7wkhBn4YkNjs7d4BGPHjuLIywS9VtmiyH7VafikMjmqPLL/uPBIbRrx9RuSfLkAuN9XFZpVmqzWY8ePpcRCvnG6ucPxEY8o+4j5nfTrgxSaIT31kH16/PFJe07tn1SZjxZE4sZTz/p9xKt6s8HXmlP3RdnXSpXWmH8ZwYDrNhkcH8m6mC3giiqSKThFdwvQVflRRvn9pAlUOhy6KIBtAt1KobVJtOCPrrkcLhQ1C+2P9wKhfYspCGrScFGnrUqumLxPpwlqILxJvmgqGAtkm8Ela9f2D9sEv8CUv5x9XptZKlyRhtOLixvLYoJlwfXXnmXa8T1pg8+4063BhHUOu/bg0InpSp3hdscOfk0R8FtDlXnn6COwbPXynIt4PxzIxD/WQhP0ymgH3ky6ClB5wRBVhOqYvxQw32n2QFS9A5ocga+nATiOE7BTOufgmDCA/OIXfJ/GukXRaMCBsvlx7tObHS1LOMt0I+WdoOEjI0ARUrFzwoiTrs9QYmd922e7S35EnheT3JjnCTjebJrCNtwritUy8vjsN/M27wJs7MAXleT7drwXXnm+3xYrH+4KQ+ru0dxMe1zfBw== aanorbel@gmail.com" + luis: + login: luis + comment: Luis Diaz + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHc04zv+G8vGOS/znLy6xd3lB0/B07uaFjgyh4UgqUMA luis@openobservatory.org" admin_usernames: [art, mehul] root_usernames: [art, mehul] From 9205c8d39b3aa017f7b2706a648e7b449f0dd234 Mon Sep 17 00:00:00 2001 From: Armin Huremagic <63638293+agiix@users.noreply.github.com> Date: Wed, 8 Jan 2025 10:04:07 +0100 Subject: [PATCH 70/88] Request access to notebook.ooni.org (#124) --- ansible/host_vars/notebook.ooni.org | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 0fbb91e9..1c3078e6 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -64,6 +64,12 @@ ssh_users: comment: "Ben Ginoe" keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOWdWCATiHUAzoS3mn3pFMIYDmi3n4Ekuzv5cEtvV0W1 root@parrot" + + agix: + login: agix + comment: "Armin Huremagic" + keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOzCkhJ4DgtReYaR4MVh+FzDw7s2j3v4qBmE+Mpk+igc agix@riseup.net"] + admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe ] jupyterhub_allowed_users: "{{ ssh_users }}" From 98c2150f70e6713fe1c6994e2688ee2e5c1ba751 Mon Sep 17 00:00:00 2001 From: Felix Hoffmann Date: Wed, 8 Jan 2025 10:06:05 +0100 Subject: [PATCH 71/88] Add credentials for notebook server (#115) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Arturo FilastΓ² --- ansible/host_vars/notebook.ooni.org | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 1c3078e6..190b5d14 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -64,14 +64,18 @@ ssh_users: comment: "Ben Ginoe" keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOWdWCATiHUAzoS3mn3pFMIYDmi3n4Ekuzv5cEtvV0W1 root@parrot" - + felixhoffmnn: + login: felixhoffmnn + comment: "Felix Hoffmann" + keys: + - "sk-ssh-ed25519@openssh.com AAAAGnNrLXNzaC1lZDI1NTE5QG9wZW5zc2guY29tAAAAIHsT7RNb3xSc4jseb6vPPvC2ORWQHQr66AQR54Vikt/cAAAAB3NzaDpocGk= ssh:hpi" agix: login: agix comment: "Armin Huremagic" keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOzCkhJ4DgtReYaR4MVh+FzDw7s2j3v4qBmE+Mpk+igc agix@riseup.net"] admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] -non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe ] +non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe, felixhoffmnn ] jupyterhub_allowed_users: "{{ ssh_users }}" admin_group_name: admin From e393643158dc5bda211bb3cf776b269afa925ea9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:06:21 +0100 Subject: [PATCH 72/88] Add tony luis (#138) Add @hynnot and @LDiazN to the terraform configs --- tf/environments/dev/main.tf | 6 +++--- tf/environments/prod/main.tf | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/tf/environments/dev/main.tf b/tf/environments/dev/main.tf index 2b14235b..c7c32937 100644 --- a/tf/environments/dev/main.tf +++ b/tf/environments/dev/main.tf @@ -75,10 +75,10 @@ module "adm_iam_roles" { source = "../../modules/adm_iam_roles" authorized_accounts = [ + "arn:aws:iam::${local.ooni_main_org_id}:user/art", "arn:aws:iam::${local.ooni_main_org_id}:user/mehul", - "arn:aws:iam::${local.ooni_dev_org_id}:user/mehul", - "arn:aws:iam::${local.ooni_dev_org_id}:user/art", - "arn:aws:iam::${local.ooni_main_org_id}:user/art" + "arn:aws:iam::${local.ooni_main_org_id}:user/luis", + "arn:aws:iam::${local.ooni_main_org_id}:user/tony" ] } diff --git a/tf/environments/prod/main.tf b/tf/environments/prod/main.tf index f3a01dde..d9152ae5 100644 --- a/tf/environments/prod/main.tf +++ b/tf/environments/prod/main.tf @@ -86,7 +86,9 @@ module "adm_iam_roles" { authorized_accounts = [ "arn:aws:iam::${local.ooni_main_org_id}:user/art", - "arn:aws:iam::${local.ooni_main_org_id}:user/mehul" + "arn:aws:iam::${local.ooni_main_org_id}:user/luis", + "arn:aws:iam::${local.ooni_main_org_id}:user/mehul", + "arn:aws:iam::${local.ooni_main_org_id}:user/tony" ] } From ad4d918af89714d1b5977c44b90cfa153caaca10 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:08:13 +0100 Subject: [PATCH 73/88] Add tony and luis to admin users group --- ansible/group_vars/all/vars.yml | 5 ++--- ansible/roles/ssh_users/tasks/main.yml | 15 --------------- 2 files changed, 2 insertions(+), 18 deletions(-) diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 8f660d78..499a8bb4 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -39,9 +39,8 @@ ssh_users: keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHc04zv+G8vGOS/znLy6xd3lB0/B07uaFjgyh4UgqUMA luis@openobservatory.org" -admin_usernames: [art, mehul] -root_usernames: [art, mehul] +admin_usernames: [art, mehul, luis, tony] +root_usernames: [art, mehul, luis, tony] non_admin_usernames: [] -deactivated_usernames: [sbs, federico, sarath] prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" diff --git a/ansible/roles/ssh_users/tasks/main.yml b/ansible/roles/ssh_users/tasks/main.yml index d3534dcd..c1cfdef5 100644 --- a/ansible/roles/ssh_users/tasks/main.yml +++ b/ansible/roles/ssh_users/tasks/main.yml @@ -51,21 +51,6 @@ mode: 0400 with_items: "{{ admin_usernames | union(non_admin_usernames) }}" - -- name: kill processes running as deactivated users - ansible.builtin.shell: - cmd: "pkill -U {{ item }}" - ignore_errors: true - with_items: "{{ deactivated_usernames }}" - -- name: remove any stale users - user: - name: "{{ item }}" - state: "absent" - remove: yes - force: yes - with_items: "{{ deactivated_usernames }}" - - name: configure sshd include_role: name: willshersystems.sshd From 1059e3876f93c9f3d1ebc02f121e71a82a1b32dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:09:05 +0100 Subject: [PATCH 74/88] Drop data2 record --- ansible/inventory | 2 -- tf/environments/prod/dns_records.tf | 8 -------- 2 files changed, 10 deletions(-) diff --git a/ansible/inventory b/ansible/inventory index 1d3c41e9..48e1bfee 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -7,7 +7,6 @@ ghs_ams [clickhouse] notebook.ooni.org data1.htz-fsn.prod.ooni.nu -data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu [airflow] @@ -20,7 +19,6 @@ data.ooni.org monitoring.ooni.org notebook.ooni.org data1.htz-fsn.prod.ooni.nu -data2.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu [ghs_ams] diff --git a/tf/environments/prod/dns_records.tf b/tf/environments/prod/dns_records.tf index 88fb03d4..61852e28 100644 --- a/tf/environments/prod/dns_records.tf +++ b/tf/environments/prod/dns_records.tf @@ -982,14 +982,6 @@ resource "aws_route53_record" "data1-htz-fsn-prod-ooni-nu-_a_" { zone_id = local.dns_zone_ooni_nu } -resource "aws_route53_record" "data2-htz-fsn-prod-ooni-nu-_A_" { - name = "data2.htz-fsn.prod.ooni.nu" - records = ["88.198.54.12"] - ttl = "60" - type = "A" - zone_id = local.dns_zone_ooni_nu -} - resource "aws_route53_record" "data3-htz-fsn-prod-ooni-nu-_A_" { name = "data3.htz-fsn.prod.ooni.nu" records = ["168.119.7.188"] From 8905d8af6f7713c61567731922e0c824bf55de3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:10:42 +0100 Subject: [PATCH 75/88] Drop data.ooni.org --- ansible/inventory | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/inventory b/ansible/inventory index 48e1bfee..ff206cbe 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -15,7 +15,6 @@ data1.htz-fsn.prod.ooni.nu ## Location tags [htz_fsn] -data.ooni.org monitoring.ooni.org notebook.ooni.org data1.htz-fsn.prod.ooni.nu From fcf8a5fad71da4b0808908a60e700f1b10162a6a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:12:09 +0100 Subject: [PATCH 76/88] Remove discontinued data host and related host_vars --- ansible/host_vars/data.ooni.org | 65 --------------------------------- ansible/inventory | 2 - 2 files changed, 67 deletions(-) delete mode 100644 ansible/host_vars/data.ooni.org diff --git a/ansible/host_vars/data.ooni.org b/ansible/host_vars/data.ooni.org deleted file mode 100644 index 38c40068..00000000 --- a/ansible/host_vars/data.ooni.org +++ /dev/null @@ -1,65 +0,0 @@ -ssh_users: - agrabeli: - login: agrabeli - comment: Maria Xynou - keys: ["ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDD0JSwM+t3Uz9lS3Mjoz9oo4vOToWyzboZhYQbP8JY5HvFtAvWanWHnUBO91t6hkgKIMiUqhdCJn26fqkhSGe/bRBaFUocOmuyfcmZoRdi0qzAskmycJsj/w6vWR4x6MYkmJvSeI/MGxjEFt4s2MfOG1tP8CBLUYft9qUleeJa7Jln8c+xbnqB7YngaI190icQHE9NuIB2CXvzbmo3tLtHNMagEwI7VoBDj6mxzTxBd9JhuhF4w5uGxxm0Gp1hzk+15obNnaBS+Anr7jXz8FPwwxCH+XhBZxB1PPpcIayKrf9iLyGtwmhkdDoWCqYAr1mue3LxFso+TZF4bwE4Cjt1 agrabelh@agrabelh"] - art: - login: art - comment: Arturo Filasto - keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJsibU0nsQFFIdolD1POzXOws4VetV0ZNByINRzY8Hx0 arturo@ooni.org"] - majakomel: - login: majakomel - comment: Maja Komel - keys: - - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQC7gWQL4h/IyMbwDuMIXbTVmNEm8Yx19Ftt0P2e3OyWctSMH7WGaHc6b0dGoGh6Y4x0Kpw5h0iHWshP8Rg0pckNG9LeDjLY9nLR3Jv66ogFQtFi1DAlg4CXe369N70rBN9iurndgXjShW9OV+bY+MOlW8Fmmm67Vg0xFiYuYzjgUOpl4ofkbLGAQ7sJRBzpDV6TqHhGfOdYMDJyfFvurVz0oSyEZPFFRv4Css9iVk7BGsBukCCpUuax8akEeEjxWWCvjYXva7OA0jHKayfPAroZx/OJh01rhFe7wxlu5JwUKOcevvAZqeHh6200C82ijZOCN+Qq9yvxOH+OgzhnQwnoetIbGFgnb4CkDxo7dVLc/DFyObznC4f26f5D1OyPMUX8AEarEVdEPwsEfD2ePQr6qek0XWCWtYvGklb+GRLk9Yn0VL1qwvgrtstHdeXsKONTPKRxaCjWHu18dQaG2qOUnZ+St6SHeL49CN9aav2azNI/YKoQ9SGR4D23XeBRsW8=" - mehul: - login: mehul - comment: Mehul Gulati - keys: - - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDEZSA9TKUaYWG8gfnMoyDZO2S6vsy87xma4R/EzNpveZiOZTYSNn+UDL8NpQRuH5YgdWuQV2E7sKw/PIYA0lC/QTiq8Btqf6sEK5YWXtQy+yn9q5kB/rmi8zjaz0FUNigRrjL+26ao+c7NKpgmR+TRqbRd5VeJ46PuFD5M3c+MBeUoF1PT0zfioQFJ1mQoXwVix0n260clEXQDp4t0GZuNpWGTS+YTuJZ2vl6TDZtt8jrnENd99QArr2KU+NMTq8T2KYcPeQOoYsm7v/1TBkbv9UStllhjdE7HZSivPT8oRkF2YZYgytDxtCZG8i5iCK+vbNn6QmZMjuXPoBUeW+Njm70tlsirrKpUX+QiogA2qljxPD9st2eUkA7cATyOBkK7WLh1HYv2xyKpPtkkaELG+EHjmaVjVdyVAgUYwqg+MbIw1OyDpNmMZcW3iOpGpflXPMmLjKNMhee0//G7NxcGfwmIMbIiBkeofOnWDrMo+0PRULFtn6C7aA7ddirck+k=" - norbel: - login: norbel - comment: Norbel Ambanumben - keys: - - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAACAQDBXprrutdT6AhrV9hWBKjyzq6RqGmCBWpWxi3qwJyRcBJfkiEYKV9QWl3H0g/Sg9JzLd9lWG2yfAai7cyBAT4Ih0+OhwQ0V7wkhBn4YkNjs7d4BGPHjuLIywS9VtmiyH7VafikMjmqPLL/uPBIbRrx9RuSfLkAuN9XFZpVmqzWY8ePpcRCvnG6ucPxEY8o+4j5nfTrgxSaIT31kH16/PFJe07tn1SZjxZE4sZTz/p9xKt6s8HXmlP3RdnXSpXWmH8ZwYDrNhkcH8m6mC3giiqSKThFdwvQVflRRvn9pAlUOhy6KIBtAt1KobVJtOCPrrkcLhQ1C+2P9wKhfYspCGrScFGnrUqumLxPpwlqILxJvmgqGAtkm8Ela9f2D9sEv8CUv5x9XptZKlyRhtOLixvLYoJlwfXXnmXa8T1pg8+4063BhHUOu/bg0InpSp3hdscOfk0R8FtDlXnn6COwbPXynIt4PxzIxD/WQhP0ymgH3ky6ClB5wRBVhOqYvxQw32n2QFS9A5ocga+nATiOE7BTOufgmDCA/OIXfJ/GukXRaMCBsvlx7tObHS1LOMt0I+WdoOEjI0ARUrFzwoiTrs9QYmd922e7S35EnheT3JjnCTjebJrCNtwritUy8vjsN/M27wJs7MAXleT7drwXXnm+3xYrH+4KQ+ru0dxMe1zfBw== aanorbel@gmail.com" - ain: - login: ain - comment: Ain - keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIH6Js4xtJq7AoYA8mFraQg8vYgKz/glil9AaPq4lDwtg ain@intertubes"] - joss: - login: joss - comment: Joss Wright - keys: - [ - "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC40MVrMUuP93UvmyTc6cGGKbdurK+CbuoQW0b4i20EPf8pjXjrTS3b/plh7y4egUfx7/2e5l321Ui8E4tuzDWjtJsSFY9l31msEnw6PTHMzOj8kVBWqHWidVZHYPpd9eVa+3ALL9HmLDQuwyhIXXaU2ExS3XZH0GJPUxgf8tubH7qteyANWTIh1XhV0fnoeBo3fvdGVkNiMLc1PSDp/iprMurdsvwCJC42+Z5R35ORpK7FJhr38Js2Ag1NwUpo3Li+PhErfoQ5A+x64p1NRm1Bnw1Z8eFHuDI6WXuzUHhuy+11M92CtaAVEoblfx75SaCftoiO0Khn6kZ9XDed+JM1 joss@pseudonymity.net" - ] - ingrid: - login: ingrid - comment: Ingrid Epure - keys: - [ - "ssh-rsa AAAB3NzaC1yc2EAAAADAQABAAACAQCTJQDb/Ucq5CRGqSJbNz33pB6fYtk7Pi+6LlIaV9QLhByp/G2/g6ae6Eb/TimZtxpdeIwpAmACmUn2p+mCLMHjpollUK2f3dUjmXiUSNGMPRPRxQoIvzf56patUCQRS+S7zDUKTDW/5e18CrIj0sFCC27y/pS6mmmeedHA6gmpW7L6kM57BlsxFu79rr/o/nrNH+qceJBEd8fM93yoIdEwxPHZyKJ5kj9+lh+4TtDLxxkwFfc6Kce1d0qxfpX1NzIbK5Vp8JlXrGEWbOFFT8S7Ru+j1/g/ptUjsXJ7DpH1wwlF6wYsU0DJuhkLv6XFZQuoHYwpZ4jmnJRWrXSgdylPk67M5Dr9aB2j0WGJNZysiXVZQZmoMUhfrNxaGVv6gB48krE6ysUoLrenR68aLOYqF8Yqvu1lCIyds1ORtjnpxWxFB7NS89us4KFofAMW+qeg/g3nEYvln9/S0b58goToNIw/p7wP9WOeh7JuM/FBT5ahJbeYpXapJh1WW6Rt48RGVwxFLXbcnH8wpCfhUw7fIVpXMhbfhtWTlWVJEAyk3eLWdNEJ7AH6jaqTdfTa4qBgrof0MgoZrb64qFDAsG9Z80Uj9oC2Zdy+gwDu76WJQfSKaD7hmq0w8khoFSVju7fvcfd5HWgLZbptCIw51mJSMQIQWs8Y/iGijTSckXXCXQ==", - ] - siti: - login: siti - comment: "Siti Nurliza" - keys: - [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIKqG1VepfzDdSp3zG27jZq3S9/62CKPLh93F///ht9rf", - ] - vasilis: - login: vasilis - comment: "Vasilis Ververis" - keys: - [ - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIMJYsbeTjdma5cKyZISOFQfHbwwlZbWugPx9haeOx1UR" - ] - michael: - login: michael - comment: "Micheal Collyer" - keys: - - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPN4Ae+KfZEbhJuvHI3PXjgeu4V0ZFIpUy9bFuBKx76W michael.collyer@oii.ox.ac.uk" -admin_usernames: [ art, majakomel, mehul, norbel ] -non_admin_usernames: [ ain, siti, ingrid, joss, vasilis ] -jupyterhub_allowed_users: "{{ ssh_users }}" -admin_group_name: adm diff --git a/ansible/inventory b/ansible/inventory index ff206cbe..17f9a4f7 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -22,8 +22,6 @@ data3.htz-fsn.prod.ooni.nu [ghs_ams] openvpn-server1.ooni.io -amsmatomo.ooni.nu -db-1.proteus.ooni.io ams-slack-1.ooni.org #mia-echoth.ooni.nu #mia-httpth.ooni.nu From 81adcb593b2b1f301e3b0f85553e2294ef8e2dd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:13:39 +0100 Subject: [PATCH 77/88] Fix username for tony --- ansible/group_vars/all/vars.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/group_vars/all/vars.yml b/ansible/group_vars/all/vars.yml index 499a8bb4..b96b60fa 100644 --- a/ansible/group_vars/all/vars.yml +++ b/ansible/group_vars/all/vars.yml @@ -39,8 +39,8 @@ ssh_users: keys: - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHc04zv+G8vGOS/znLy6xd3lB0/B07uaFjgyh4UgqUMA luis@openobservatory.org" -admin_usernames: [art, mehul, luis, tony] -root_usernames: [art, mehul, luis, tony] +admin_usernames: [art, mehul, luis, hynnot] +root_usernames: [art, mehul, luis, hynnot] non_admin_usernames: [] prometheus_metrics_password: "{{ lookup('amazon.aws.aws_secret', 'oonidevops/ooni_services/prometheus_metrics_password', profile='oonidevops_user_prod') }}" From 284e989d327307fc598d7eaf5c6be2f0c60a4626 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:15:59 +0100 Subject: [PATCH 78/88] Add luis and hynnot to notebook host --- ansible/host_vars/notebook.ooni.org | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/ansible/host_vars/notebook.ooni.org b/ansible/host_vars/notebook.ooni.org index 190b5d14..141d387a 100644 --- a/ansible/host_vars/notebook.ooni.org +++ b/ansible/host_vars/notebook.ooni.org @@ -73,8 +73,18 @@ ssh_users: login: agix comment: "Armin Huremagic" keys: ["ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIOzCkhJ4DgtReYaR4MVh+FzDw7s2j3v4qBmE+Mpk+igc agix@riseup.net"] - -admin_usernames: [ art, agrabeli, majakomel, mehul, norbel ] + luis: + login: luis + comment: Luis Diaz + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIHc04zv+G8vGOS/znLy6xd3lB0/B07uaFjgyh4UgqUMA luis@openobservatory.org" + hynnot: + login: hynnot + comment: Tony Morella + keys: + - "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIBTiOgr4PenzkF03NqFTGgacZ1BUWLkdCS1xNba4iLfP hynnot" + +admin_usernames: [ art, agrabeli, majakomel, mehul, norbel, luis, hynnot ] non_admin_usernames: [ ain, siti, ingrid, joss, vasilis, michael, benginoe, felixhoffmnn ] jupyterhub_allowed_users: "{{ ssh_users }}" admin_group_name: admin From 1b0be7c72c9b5ec6935984cfdb508afdb51a5b7d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:18:14 +0100 Subject: [PATCH 79/88] Only notify slack if we aren't running in check mode --- ansible/play | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ansible/play b/ansible/play index ffbc9904..26aeff3e 100755 --- a/ansible/play +++ b/ansible/play @@ -8,5 +8,8 @@ ANSIBLE_SLACK_CMD=`printf "%q " "$0" "$@"` ANSIBLE_SLACK_CMD="${ANSIBLE_SLACK_CMD% }" # strip trailing whitespace export ANSIBLE_SLACK_CMD -ansible localhost --module-name include_role --args name=notify-slack +# Check if --check or -C is present in the arguments +if [[ ! " $* " =~ " --check " && ! " $* " =~ " -C " ]]; then + ansible localhost --module-name include_role --args name=notify-slack +fi ansible-playbook "$@" From 1b2e3820ff41bd858d66c77089d226d72c9a6260 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:27:16 +0100 Subject: [PATCH 80/88] Add missing hosts to inventory --- ansible/inventory | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ansible/inventory b/ansible/inventory index 17f9a4f7..6b24654a 100644 --- a/ansible/inventory +++ b/ansible/inventory @@ -19,9 +19,15 @@ monitoring.ooni.org notebook.ooni.org data1.htz-fsn.prod.ooni.nu data3.htz-fsn.prod.ooni.nu +#backend-fsn.ooni.org + +[htz_hel] +backend-hel.ooni.org [ghs_ams] openvpn-server1.ooni.io ams-slack-1.ooni.org +ams-ps.ooni.nu +# currently disabled due to them not supporting ed25519 keys #mia-echoth.ooni.nu #mia-httpth.ooni.nu From 47af40a10084d78d38fc4aefed76504f9b649852 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:41:44 +0100 Subject: [PATCH 81/88] Fix nftables config of ams-slack-1 --- ansible/host_vars/ams-slack-1.ooni.org | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 ansible/host_vars/ams-slack-1.ooni.org diff --git a/ansible/host_vars/ams-slack-1.ooni.org b/ansible/host_vars/ams-slack-1.ooni.org new file mode 100644 index 00000000..21dc7d2f --- /dev/null +++ b/ansible/host_vars/ams-slack-1.ooni.org @@ -0,0 +1,12 @@ +nft_rules_tcp: + - name: 22 + rules: + - add rule inet filter input tcp dport 22 counter accept comment "Incoming SSH" + - name: 80 + rules: + - add rule inet filter input tcp dport 80 counter accept comment "incoming HTTP" + - name: 443 + rules: + - add rule inet filter input tcp dport 443 counter accept comment "incoming HTTPS" + - name: 9100 + - add rule inet filter input ip saddr 5.9.112.244 tcp dport 9100 counter accept comment "clickhouse prometheus from monitoring.ooni.org" From a19e5bd42bc162fe4715aa2028e95a9addc49502 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 10:42:25 +0100 Subject: [PATCH 82/88] Fix indent --- ansible/host_vars/ams-slack-1.ooni.org | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ansible/host_vars/ams-slack-1.ooni.org b/ansible/host_vars/ams-slack-1.ooni.org index 21dc7d2f..f6dec97a 100644 --- a/ansible/host_vars/ams-slack-1.ooni.org +++ b/ansible/host_vars/ams-slack-1.ooni.org @@ -8,5 +8,6 @@ nft_rules_tcp: - name: 443 rules: - add rule inet filter input tcp dport 443 counter accept comment "incoming HTTPS" - - name: 9100 - - add rule inet filter input ip saddr 5.9.112.244 tcp dport 9100 counter accept comment "clickhouse prometheus from monitoring.ooni.org" + - name: 9100 + rules: + - add rule inet filter input ip saddr 5.9.112.244 tcp dport 9100 counter accept comment "clickhouse prometheus from monitoring.ooni.org" From c6ac8bcaba899cba1a2abb097c88e50f95e733cf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 11:44:18 +0100 Subject: [PATCH 83/88] Reload sshd at the end of ssh_users role --- ansible/roles/ssh_users/tasks/main.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ansible/roles/ssh_users/tasks/main.yml b/ansible/roles/ssh_users/tasks/main.yml index c1cfdef5..d67b7acb 100644 --- a/ansible/roles/ssh_users/tasks/main.yml +++ b/ansible/roles/ssh_users/tasks/main.yml @@ -79,3 +79,8 @@ ansible.builtin.file: path: /etc/sudoers.d/adm state: absent + +- name: reload sshd + ansible.builtin.systemd_service: + name: sshd + state: reloaded From 5818ddf2a842d1d2bc2964b36a338e8b3e62b6ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 11:44:54 +0100 Subject: [PATCH 84/88] Remove duplicate key from airflow group vars --- ansible/group_vars/airflow/vars.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/ansible/group_vars/airflow/vars.yml b/ansible/group_vars/airflow/vars.yml index e4946716..3fb68ebe 100644 --- a/ansible/group_vars/airflow/vars.yml +++ b/ansible/group_vars/airflow/vars.yml @@ -9,5 +9,4 @@ airflow_admin_users: airflow_fernet_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_fernet_key', profile='oonidevops_user_prod') }}" airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" airflow_executor: "LocalExecutor" -airflow_webserver_secret_key: "{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_webserver_secret_key', profile='oonidevops_user_prod') }}" airflow_database_conn: "postgresql+psycopg2://airflow:{{ lookup('amazon.aws.aws_ssm', '/oonidevops/secrets/airflow_postgresql_password', profile='oonidevops_user_prod') }}@ooni-tier0-postgres.c7mgscca82no.eu-central-1.rds.amazonaws.com/airflow" From 37ddd386b2754aa8efce288357069198f65fca51 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 13:57:00 +0100 Subject: [PATCH 85/88] Update ansible credentials config --- ansible/README.md | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/ansible/README.md b/ansible/README.md index f8fc08f1..cbe2d3f7 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -39,21 +39,15 @@ You should then setup AWS credentials, by adding 2 profiles called `oonidevops_u To this end edit your `~/.aws/credentials` file to contain: ``` +[oonidevops_user] +aws_access_key_id = YYYY +aws_secret_access_key = ZZZ [oonidevops_user_dev] -aws_access_key_id = XXX -aws_secret_access_key = YYY -source_profile = default -region = eu-central-1 -# ARN of the dev role role_arn = arn:aws:iam::905418398257:role/oonidevops - +source_profile = oonidevops_user [oonidevops_user_prod] -aws_access_key_id = XXX -aws_secret_access_key = YYY -source_profile = default -region = eu-central-1 -# ARN of the prod role role_arn = arn:aws:iam::471112720364:role/oonidevops +source_profile = oonidevops_user ``` ### SSH configuration From cd96be7553e035f46635cb9b11474977833fee66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 13:57:49 +0100 Subject: [PATCH 86/88] Update credentials setup in terraform docs --- tf/README.md | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/tf/README.md b/tf/README.md index 8f1ee41a..b9ae6785 100644 --- a/tf/README.md +++ b/tf/README.md @@ -9,9 +9,14 @@ Terraform is used for managing the OONI infrastructure as code. ``` [oonidevops_user] -aws_access_key_id = XXXX -aws_secret_access_key = YYYY -role_arn = arn:aws:iam::OONI_ORG_ID:role/oonidevops +aws_access_key_id = YYYY +aws_secret_access_key = ZZZ +[oonidevops_user_dev] +role_arn = arn:aws:iam::905418398257:role/oonidevops +source_profile = oonidevops_user +[oonidevops_user_prod] +role_arn = arn:aws:iam::471112720364:role/oonidevops +source_profile = oonidevops_user ``` Where you replace OONI_ORG_ID with the ID of the ORG you are deploying to (dev, From 547426b227e0f2960a1bd76cf6c6cad9a81bfcac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 13:58:45 +0100 Subject: [PATCH 87/88] Avoid duplicating information --- ansible/README.md | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) diff --git a/ansible/README.md b/ansible/README.md index cbe2d3f7..8b220965 100644 --- a/ansible/README.md +++ b/ansible/README.md @@ -34,21 +34,7 @@ ssh-keygen -t ed25519 -f ~/.ssh/id_ed25519_ooni ### AWS configuration -You should then setup AWS credentials, by adding 2 profiles called `oonidevops_user_dev` and `oonidevops_user_prod` which have access to the development and production environment respectively. - -To this end edit your `~/.aws/credentials` file to contain: - -``` -[oonidevops_user] -aws_access_key_id = YYYY -aws_secret_access_key = ZZZ -[oonidevops_user_dev] -role_arn = arn:aws:iam::905418398257:role/oonidevops -source_profile = oonidevops_user -[oonidevops_user_prod] -role_arn = arn:aws:iam::471112720364:role/oonidevops -source_profile = oonidevops_user -``` +Refer to the [terraform docs](devops/terraform/) for setting up your AWS configuration. ### SSH configuration From 851723b040c7748f873123d5f2699bf72aa7143f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Arturo=20Filast=C3=B2?= Date: Wed, 8 Jan 2025 18:05:53 +0100 Subject: [PATCH 88/88] Add issues to project workflow --- .github/workflows/add_issues_to_project.yml | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 .github/workflows/add_issues_to_project.yml diff --git a/.github/workflows/add_issues_to_project.yml b/.github/workflows/add_issues_to_project.yml new file mode 100644 index 00000000..b1f07aac --- /dev/null +++ b/.github/workflows/add_issues_to_project.yml @@ -0,0 +1,14 @@ +on: + issues: + types: + - opened + +jobs: + add-to-project: + name: Add issue to project + runs-on: ubuntu-latest + steps: + - uses: actions/add-to-project@RELEASE_VERSION + with: + project-url: https://github.com/orgs/ooni/projects/31 + github-token: ${{ secrets.ADD_TO_PROJECT_GH_TOKEN }}