From aac6938c1299a3d1144cad288b39f0563c9a106e Mon Sep 17 00:00:00 2001 From: Jonatan Heyman Date: Wed, 31 Jan 2024 10:19:51 +0100 Subject: [PATCH] Add tag for PostgreSQL 16 --- 16/Dockerfile | 21 +++++++ 16/backup.py | 143 +++++++++++++++++++++++++++++++++++++++++++++++ 16/entrypoint.sh | 12 ++++ 16/restore.py | 81 +++++++++++++++++++++++++++ 4 files changed, 257 insertions(+) create mode 100644 16/Dockerfile create mode 100644 16/backup.py create mode 100755 16/entrypoint.sh create mode 100644 16/restore.py diff --git a/16/Dockerfile b/16/Dockerfile new file mode 100644 index 0000000..c36aa7d --- /dev/null +++ b/16/Dockerfile @@ -0,0 +1,21 @@ +FROM postgres:16-alpine + +# Install dependencies +RUN apk update && apk add --no-cache --virtual .build-deps && apk add \ + bash make curl openssh git + +# Install aws-cli +RUN apk -Uuv add groff less python3 py-pip && pip install awscli +# Cleanup +RUN rm /var/cache/apk/* + + +VOLUME ["/data/backups"] + +ENV BACKUP_DIR /data/backups + +ADD . /backup + +ENTRYPOINT ["/backup/entrypoint.sh"] + +CMD crond -f -l 2 diff --git a/16/backup.py b/16/backup.py new file mode 100644 index 0000000..598b7bd --- /dev/null +++ b/16/backup.py @@ -0,0 +1,143 @@ +#!/usr/bin/python3 + +import os +import subprocess +import sys +from datetime import datetime + +dt = datetime.now() + +BACKUP_DIR = os.environ["BACKUP_DIR"] + +S3_PATH = os.environ["S3_PATH"] +S3_STORAGE_CLASS = os.environ.get("S3_STORAGE_CLASS") or "STANDARD_IA" +S3_EXTRA_OPTIONS = os.environ.get("S3_EXTRA_OPTIONS") or "" + +DB_USE_ENV = os.environ.get("DB_USE_ENV") or False +DB_NAME = os.environ["DB_NAME"] if "DB_NAME" in os.environ else os.environ.get("PGDATABASE") + +if not DB_NAME: + raise Exception("DB_NAME must be set") + +if not DB_USE_ENV: + DB_HOST = os.environ["DB_HOST"] + DB_PASS = os.environ["DB_PASS"] + DB_USER = os.environ["DB_USER"] + DB_PORT = os.environ.get("DB_PORT") or "5432" + +MAIL_TO = os.environ.get("MAIL_TO") +MAIL_FROM = os.environ.get("MAIL_FROM") +WEBHOOK = os.environ.get("WEBHOOK") +WEBHOOK_METHOD = os.environ.get("WEBHOOK_METHOD") +WEBHOOK_DATA = os.environ.get("WEBHOOK_DATA") +WEBHOOK_CURL_OPTIONS = os.environ.get("WEBHOOK_CURL_OPTIONS") or "" +KEEP_BACKUP_DAYS = int(os.environ.get("KEEP_BACKUP_DAYS", 7)) +FILENAME = os.environ.get("FILENAME", DB_NAME + "_%Y-%m-%d") + +file_name = dt.strftime(FILENAME) +backup_file = os.path.join(BACKUP_DIR, file_name) + +if not S3_PATH.endswith("/"): + S3_PATH = S3_PATH + "/" + +if WEBHOOK_DATA and not WEBHOOK_METHOD: + WEBHOOK_METHOD = 'POST' +else: + WEBHOOK_METHOD = WEBHOOK_METHOD or 'GET' + +def cmd(command, **kwargs): + try: + subprocess.check_output([command], shell=True, stderr=subprocess.STDOUT, **kwargs) + except subprocess.CalledProcessError as e: + sys.stderr.write("\n".join([ + "Command execution failed. Output:", + "-"*80, + e.output, + "-"*80, + "" + ])) + raise + +def backup_exists(): + return os.path.exists(backup_file) + +def take_backup(): + env = os.environ.copy() + if DB_USE_ENV: + env.update({key: os.environ[key] for key in os.environ.keys() if key.startswith('PG') }) + else: + env.update({'PGPASSWORD': DB_PASS, 'PGHOST': DB_HOST, 'PGUSER': DB_USER, 'PGDATABASE': DB_NAME, 'PGPORT': DB_PORT}) + + # trigger postgres-backup + cmd("pg_dump -Fc > %s" % backup_file, env=env) + +def upload_backup(): + opts = "--storage-class=%s %s" % (S3_STORAGE_CLASS, S3_EXTRA_OPTIONS) + cmd("aws s3 cp %s %s %s" % (opts, backup_file, S3_PATH)) + +def prune_local_backup_files(): + cmd("find %s -type f -prune -mtime +%i -exec rm -f {} \;" % (BACKUP_DIR, KEEP_BACKUP_DAYS)) + +def send_email(to_address, from_address, subject, body): + """ + Super simple, doesn't do any escaping + """ + cmd("""aws --region us-east-1 ses send-email --from %(from)s --destination '{"ToAddresses":["%(to)s"]}' --message '{"Subject":{"Data":"%(subject)s","Charset":"UTF-8"},"Body":{"Text":{"Data":"%(body)s","Charset":"UTF-8"}}}'""" % { + "to": to_address, + "from": from_address, + "subject": subject, + "body": body, + }) + +def log(msg): + print("[%s]: %s" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), msg)) + +def pretty_bytes(num): + for x in ['bytes', 'KB', 'MB', 'GB', 'TB']: + if num < 1024.0: + return "%3.1f %s" % (num, x) + num /= 1024.0 + +def main(): + start_time = datetime.now() + log("Dumping database") + take_backup() + backup_size=os.path.getsize(backup_file) + + log("Uploading to S3") + upload_backup() + log("Pruning local backup copies") + prune_local_backup_files() + end_time = datetime.now() + + meta = { + "filename": file_name, + "date": end_time.strftime("%Y-%m-%d"), + "time": end_time.strftime('%H:%M:%S'), + "duration": "%.2f" % ((end_time - start_time).total_seconds()), + "size": pretty_bytes(backup_size) + } + + if MAIL_TO and MAIL_FROM: + log("Sending mail to %s" % MAIL_TO) + send_email( + MAIL_TO, + MAIL_FROM, + "Backup complete: %s" % DB_NAME, + "Took %(duration)s seconds" % meta, + ) + + if WEBHOOK: + if WEBHOOK_DATA: + opts = "%s -d '%s'" % (WEBHOOK_CURL_OPTIONS, WEBHOOK_DATA % meta) + else: + opts = WEBHOOK_CURL_OPTIONS + + log("Making HTTP %s request to webhook: %s" % (WEBHOOK_METHOD, WEBHOOK)) + cmd("curl -X %s %s %s" % (WEBHOOK_METHOD, opts, WEBHOOK)) + + log("Backup complete, took %(duration)s seconds, size %(size)s" % meta) + + +if __name__ == "__main__": + main() diff --git a/16/entrypoint.sh b/16/entrypoint.sh new file mode 100755 index 0000000..119e8b5 --- /dev/null +++ b/16/entrypoint.sh @@ -0,0 +1,12 @@ +#!/bin/bash +set -e + +if [[ $@ == *crond* ]] && [ -z "$CRON_SCHEDULE" ]; then + echo "ERROR: \$CRON_SCHEDULE not set!" + exit 1 +fi + +# Write cron schedule +echo "$CRON_SCHEDULE python3 -u /backup/backup.py > /dev/stdout" > /var/spool/cron/crontabs/root + +exec "$@" diff --git a/16/restore.py b/16/restore.py new file mode 100644 index 0000000..ed5eadb --- /dev/null +++ b/16/restore.py @@ -0,0 +1,81 @@ +#!/usr/bin/python3 + +import os +import subprocess +import sys +from datetime import datetime + +BACKUP_DIR = os.environ["BACKUP_DIR"] + +S3_PATH = os.environ["S3_PATH"] +S3_EXTRA_OPTIONS = os.environ.get("S3_EXTRA_OPTIONS") or "" + +DB_USE_ENV = os.environ.get("DB_USE_ENV") or False +DB_NAME = os.environ["DB_NAME"] if "DB_NAME" in os.environ else os.environ.get("PGDATABASE") + +if not DB_NAME: + raise Exception("DB_NAME must be set") + +if not DB_USE_ENV: + DB_HOST = os.environ["DB_HOST"] + DB_PASS = os.environ["DB_PASS"] + DB_USER = os.environ["DB_USER"] + DB_PORT = os.environ.get("DB_PORT") or "5432" + +file_name = sys.argv[1] +backup_file = os.path.join(BACKUP_DIR, file_name) + +if not S3_PATH.endswith("/"): + S3_PATH = S3_PATH + "/" + +def cmd(command, **kwargs): + try: + subprocess.check_output([command], shell=True, stderr=subprocess.STDOUT, **kwargs) + except subprocess.CalledProcessError as e: + sys.stderr.write("\n".join([ + "Command execution failed. Output:", + "-"*80, + e.output, + "-"*80, + "" + ])) + raise + +def backup_exists(): + return os.path.exists(backup_file) + +def restore_backup(): + if not backup_exists(): + sys.stderr.write("Backup file doesn't exists!\n") + sys.exit(1) + + # restore postgres-backup + env = os.environ.copy() + if DB_USE_ENV: + env.update({ key: os.environ[key] for key in os.environ.keys() if key.startswith('PG') }) + else: + env.update({'PGPASSWORD': DB_PASS, 'PGHOST': DB_HOST, 'PGUSER': DB_USER, 'PGDATABASE': DB_NAME, 'PGPORT': DB_PORT}) + + cmd("pg_restore -Fc -d %s %s" % (DB_NAME, backup_file), env=env) + +def download_backup(): + cmd("aws s3 cp %s %s%s %s" % (S3_EXTRA_OPTIONS, S3_PATH, file_name, backup_file)) + +def log(msg): + print("[%s]: %s" % (datetime.now().strftime("%Y-%m-%d %H:%M:%S"), msg)) + +def main(): + start_time = datetime.now() + if backup_exists(): + log("Backup file already exists in filesystem %s" % backup_file) + else: + log("Downloading database dump") + download_backup() + + log("Restoring database") + restore_backup() + + log("Restore complete, took %.2f seconds" % (datetime.now() - start_time).total_seconds()) + +if __name__ == "__main__": + main()