From 2a91ce1a23845237345c1440c0c89a61d23189de Mon Sep 17 00:00:00 2001 From: Mathieu Leplatre Date: Thu, 4 Jul 2024 11:35:55 +0200 Subject: [PATCH] Add `build_bundles` command (#1474) * Basic bundling code * Add tests and upload files to GCloud * Install cloud storage lib * Polish and add code comments * Upgrade kinto-http.py * Rewrite to be idempotent when collections haven't changed * setuptools is not required anymore since kinto-http 11.2.0 --- aws_lambda.py | 4 + commands/__init__.py | 17 +++ commands/build_bundles.py | 202 +++++++++++++++++++++++++++ requirements.in | 3 +- requirements.txt | 270 +++++++++++++++++++++++++++++++++--- tests/test_build_bundles.py | 258 ++++++++++++++++++++++++++++++++++ 6 files changed, 731 insertions(+), 23 deletions(-) create mode 100644 commands/build_bundles.py create mode 100644 tests/test_build_bundles.py diff --git a/aws_lambda.py b/aws_lambda.py index cf0a800c..beb3a181 100755 --- a/aws_lambda.py +++ b/aws_lambda.py @@ -98,6 +98,10 @@ def sync_megaphone(*args, **kwargs): return run("sync_megaphone", *args, **kwargs) +def build_bundles(*args, **kwargs): + return run("build_bundles", *args, **kwargs) + + def main(*args): # Run the function specified in CLI arg. # diff --git a/commands/__init__.py b/commands/__init__.py index fd087ff9..8666977a 100644 --- a/commands/__init__.py +++ b/commands/__init__.py @@ -1,3 +1,4 @@ +import concurrent.futures import os import backoff @@ -55,6 +56,14 @@ def get_records(self, *args, **kwargs): def get_records_timestamp(self, *args, **kwargs): return super().get_records_timestamp(*args, **kwargs) + @retry_timeout + def get_changeset(self, bid, cid, expected): + url = f"{self.session.server_url}/buckets/{bid}/collections/{cid}/changeset?_expected={expected}" + resp = requests.get(url) + resp.raise_for_status() + changeset = resp.json() + return changeset + def records_equal(a, b): """Compare records, ignoring timestamps.""" @@ -62,3 +71,11 @@ def records_equal(a, b): ra = {k: v for k, v in a.items() if k not in ignored_fields} rb = {k: v for k, v in b.items() if k not in ignored_fields} return ra == rb + + +def call_parallel(func, args_list, max_workers=4): + results = [] + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [executor.submit(func, *args) for args in args_list] + results = [future.result() for future in futures] + return results diff --git a/commands/build_bundles.py b/commands/build_bundles.py new file mode 100644 index 00000000..8cd41fbe --- /dev/null +++ b/commands/build_bundles.py @@ -0,0 +1,202 @@ +""" +This command will create Zip files in order to bundle all collections data, +and all attachments of collections that have the `attachment.bundle` flag in +their metadata. +It then uploads these zip files to Google Cloud Storage. +""" + +import io +import json +import os +import random +import zipfile +from email.utils import parsedate_to_datetime + +import requests +from google.cloud import storage + +from . import KintoClient, call_parallel, retry_timeout + + +SERVER = os.getenv("SERVER") +REQUESTS_PARALLEL_COUNT = int(os.getenv("REQUESTS_PARALLEL_COUNT", "8")) +BUNDLE_MAX_SIZE_BYTES = int(os.getenv("BUNDLE_MAX_SIZE_BYTES", "20_000_000")) +STORAGE_BUCKET_NAME = os.getenv("STORAGE_BUCKET_NAME", "remote-settings-nonprod-stage-attachments") +DESTINATION_FOLDER = os.getenv("DESTINATION_FOLDER", "bundles") +# Flags for local development +BUILD_ALL = os.getenv("BUILD_ALL", "0") in "1yY" +SKIP_UPLOAD = os.getenv("SKIP_UPLOAD", "0") in "1yY" + + +def fetch_all_changesets(client): + """ + Return the `/changeset` responses for all collections listed + in the `monitor/changes` endpoint. + The result contains the metadata and all the records of all collections + for both preview and main buckets. + """ + random_cache_bust = random.randint(999999000000, 999999999999) + monitor_changeset = client.get_changeset("monitor", "changes", random_cache_bust) + print("%s collections" % len(monitor_changeset["changes"])) + + args_list = [ + (c["bucket"], c["collection"], c["last_modified"]) for c in monitor_changeset["changes"] + ] + all_changesets = call_parallel( + lambda bid, cid, ts: client.get_changeset(bid, cid, ts), args_list, REQUESTS_PARALLEL_COUNT + ) + return [ + {"bucket": bid, **changeset} for (bid, _, _), changeset in zip(args_list, all_changesets) + ] + + +@retry_timeout +def get_modified_timestamp(url): + """ + Return URL modified date as epoch millisecond. + """ + resp = requests.get(url) + if not resp.ok: + return None + dts = resp.headers["Last-Modified"] + dt = parsedate_to_datetime(dts) + epoch_msec = int(dt.timestamp() * 1000) + return epoch_msec + + +@retry_timeout +def fetch_attachment(url): + print("Fetch %r" % url) + resp = requests.get(url) + return resp.content + + +def write_zip(output_path: str, content: list[tuple[str, bytes]]): + """ + Write a Zip at the specified `output_path` location with the specified `content`. + The content is specified as a list of file names and their binary content. + """ + parent_folder = os.path.dirname(output_path) + os.makedirs(parent_folder, exist_ok=True) + + zip_buffer = io.BytesIO() + with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zip_file: + for filename, filecontent in content: + zip_file.writestr(filename, filecontent) + with open(output_path, "wb") as f: + f.write(zip_buffer.getvalue()) + print("Wrote %r" % output_path) + + +def sync_cloud_storage( + storage_bucket: str, remote_folder: str, to_upload: list[str], to_delete: list[str] +): + """ + Upload the specified `to_upload` filenames, and delete the specified `to_delete` filenames + from the `remote_folder` of the `storage_bucket`. + """ + # Ensure you have set the GOOGLE_APPLICATION_CREDENTIALS environment variable + # to the path of your Google Cloud service account key file before running this script. + client = storage.Client() + bucket = client.bucket(storage_bucket) + for filename in to_upload: + remote_file_path = os.path.join(remote_folder, filename) + blob = bucket.blob(remote_file_path) + blob.upload_from_filename(filename) + print(f"Uploaded {filename} to gs://{storage_bucket}/{remote_file_path}") + + to_delete = {os.path.join(remote_folder, f) for f in to_delete} + blobs = bucket.list_blobs(prefix=remote_folder) + for blob in blobs: + if blob.name in to_delete: + blob.delete() + print(f"Deleted gs://{storage_bucket}/{blob.name}") + + +def build_bundles(event, context): + """ + Main command entry point that: + - fetches all collections changesets + - builds a `changesets.zip` + - fetches attachments of all collections with bundle flag + - builds `{bid}--{cid}.zip` for each of them + - send the bundles to the Cloud storage bucket + """ + rs_server = event.get("server") or SERVER + + client = KintoClient(server_url=rs_server) + + base_url = client.server_info()["capabilities"]["attachments"]["base_url"] + + existing_bundle_timestamp = get_modified_timestamp( + f"{base_url}{DESTINATION_FOLDER}/changesets.zip" + ) + if existing_bundle_timestamp is None: + print("No previous bundle found") # Should only happen once. + existing_bundle_timestamp = -1 + + all_changesets = fetch_all_changesets(client) + highest_timestamp = max(c["timestamp"] for c in all_changesets) + + if existing_bundle_timestamp >= highest_timestamp: + print("Existing bundle up-to-date. Nothing to do.") + return + + bundles_to_upload = [] + bundles_to_delete = [] + + write_zip( + "changesets.zip", + [ + ("{bucket}--{metadata[id]}.json".format(**changeset), json.dumps(changeset)) + for changeset in all_changesets + ], + ) + bundles_to_upload.append("changesets.zip") + + for changeset in all_changesets: + bid = changeset["bucket"] + cid = changeset["metadata"]["id"] + should_bundle = changeset["metadata"].get("attachment", {}).get("bundle", False) + attachments_bundle_filename = f"{bid}--{cid}.zip" + + if not should_bundle: + bundles_to_delete.append(attachments_bundle_filename) + if not BUILD_ALL: + continue + + if not BUILD_ALL and changeset["timestamp"] < existing_bundle_timestamp: + # Collection hasn't changed since last bundling. + continue + + # Skip bundle if no attachments found. + records = [r for r in changeset["changes"] if "attachment" in r] + if not records: + print("%s/%s has no attachments" % (bid, cid)) + bundles_to_delete.append(attachments_bundle_filename) + continue + + print("%s/%s: %s records with attachments" % (bid, cid, len(records))) + + # Skip bundle if total size is too big. + total_size_bytes = sum(r["attachment"]["size"] for r in records) + total_size_mb = total_size_bytes / 1024 / 1024 + if total_size_bytes > BUNDLE_MAX_SIZE_BYTES: + print(f"Bundle would be too big ({total_size_mb:.2f}MB). Skip.") + continue + print(f"Attachments total size {total_size_mb:.2f}MB") + + # Fetch all attachments and build "{bid}--{cid}.zip" + args_list = [(f'{base_url}{r["attachment"]["location"]}',) for r in records] + all_attachments = call_parallel(fetch_attachment, args_list, REQUESTS_PARALLEL_COUNT) + write_zip( + attachments_bundle_filename, + [(f'{record["id"]}.meta.json', json.dumps(record)) for record in records] + + [(record["id"], attachment) for record, attachment in zip(records, all_attachments)], + ) + bundles_to_upload.append(attachments_bundle_filename) + + if not SKIP_UPLOAD: + sync_cloud_storage( + STORAGE_BUCKET_NAME, DESTINATION_FOLDER, bundles_to_upload, bundles_to_delete + ) diff --git a/requirements.in b/requirements.in index 6028e776..2570ba0f 100644 --- a/requirements.in +++ b/requirements.in @@ -3,5 +3,4 @@ python-decouple kinto-http requests sentry_sdk -# kinto-http requires setuptools and its `pkg_resources` package -setuptools +google-cloud-storage diff --git a/requirements.txt b/requirements.txt index 42d2de6f..a407d7f5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --allow-unsafe --generate-hashes +# pip-compile --generate-hashes # backoff==2.2.1 \ --hash=sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba \ @@ -10,24 +10,247 @@ backoff==2.2.1 \ # via # -r requirements.in # kinto-http -certifi==2023.7.22 \ - --hash=sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082 \ - --hash=sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9 +cachetools==5.3.3 \ + --hash=sha256:0abad1021d3f8325b2fc1d2e9c8b9c9d57b04c3932657a72465447332c24d945 \ + --hash=sha256:ba29e2dfa0b8b556606f097407ed1aa62080ee108ab0dc5ec9d6a723a007d105 + # via google-auth +certifi==2024.6.2 \ + --hash=sha256:3cd43f1c6fa7dedc5899d69d3ad0398fd018ad1a17fba83ddaf78aa46c747516 \ + --hash=sha256:ddc6c8ce995e6987e7faf5e3f1b02b302836a0e5d98ece18392cb1a36c72ad56 # via # requests # sentry-sdk -charset-normalizer==2.0.7 \ - --hash=sha256:e019de665e2bcf9c2b64e2e5aa025fa991da8720daa3c1138cadd2fd1856aed0 \ - --hash=sha256:f7af805c321bfa1ce6714c51f254e0d5bb5e5834039bc17db7ebe3a4cec9492b +charset-normalizer==3.3.2 \ + --hash=sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027 \ + --hash=sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087 \ + --hash=sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786 \ + --hash=sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8 \ + --hash=sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09 \ + --hash=sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185 \ + --hash=sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574 \ + --hash=sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e \ + --hash=sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519 \ + --hash=sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898 \ + --hash=sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269 \ + --hash=sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3 \ + --hash=sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f \ + --hash=sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6 \ + --hash=sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8 \ + --hash=sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a \ + --hash=sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73 \ + --hash=sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc \ + --hash=sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714 \ + --hash=sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2 \ + --hash=sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc \ + --hash=sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce \ + --hash=sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d \ + --hash=sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e \ + --hash=sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6 \ + --hash=sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269 \ + --hash=sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96 \ + --hash=sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d \ + --hash=sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a \ + --hash=sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4 \ + --hash=sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77 \ + --hash=sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d \ + --hash=sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0 \ + --hash=sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed \ + --hash=sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068 \ + --hash=sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac \ + --hash=sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25 \ + --hash=sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8 \ + --hash=sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab \ + --hash=sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26 \ + --hash=sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2 \ + --hash=sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db \ + --hash=sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f \ + --hash=sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5 \ + --hash=sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99 \ + --hash=sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c \ + --hash=sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d \ + --hash=sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811 \ + --hash=sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa \ + --hash=sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a \ + --hash=sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03 \ + --hash=sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b \ + --hash=sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04 \ + --hash=sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c \ + --hash=sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001 \ + --hash=sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458 \ + --hash=sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389 \ + --hash=sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99 \ + --hash=sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985 \ + --hash=sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537 \ + --hash=sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238 \ + --hash=sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f \ + --hash=sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d \ + --hash=sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796 \ + --hash=sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a \ + --hash=sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143 \ + --hash=sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8 \ + --hash=sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c \ + --hash=sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5 \ + --hash=sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5 \ + --hash=sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711 \ + --hash=sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4 \ + --hash=sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6 \ + --hash=sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c \ + --hash=sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7 \ + --hash=sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4 \ + --hash=sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b \ + --hash=sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae \ + --hash=sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12 \ + --hash=sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c \ + --hash=sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae \ + --hash=sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8 \ + --hash=sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887 \ + --hash=sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b \ + --hash=sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4 \ + --hash=sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f \ + --hash=sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5 \ + --hash=sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33 \ + --hash=sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519 \ + --hash=sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561 # via requests +google-api-core==2.19.1 \ + --hash=sha256:f12a9b8309b5e21d92483bbd47ce2c445861ec7d269ef6784ecc0ea8c1fa6125 \ + --hash=sha256:f4695f1e3650b316a795108a76a1c416e6afb036199d1c1f1f110916df479ffd + # via + # google-cloud-core + # google-cloud-storage +google-auth==2.31.0 \ + --hash=sha256:042c4702efa9f7d3c48d3a69341c209381b125faa6dbf3ebe56bc7e40ae05c23 \ + --hash=sha256:87805c36970047247c8afe614d4e3af8eceafc1ebba0c679fe75ddd1d575e871 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage +google-cloud-core==2.4.1 \ + --hash=sha256:9b7749272a812bde58fff28868d0c5e2f585b82f37e09a1f6ed2d4d10f134073 \ + --hash=sha256:a9e6a4422b9ac5c29f79a0ede9485473338e2ce78d91f2370c01e730eab22e61 + # via google-cloud-storage +google-cloud-storage==2.17.0 \ + --hash=sha256:49378abff54ef656b52dca5ef0f2eba9aa83dc2b2c72c78714b03a1a95fe9388 \ + --hash=sha256:5b393bc766b7a3bc6f5407b9e665b2450d36282614b7945e570b3480a456d1e1 + # via -r requirements.in +google-crc32c==1.5.0 \ + --hash=sha256:024894d9d3cfbc5943f8f230e23950cd4906b2fe004c72e29b209420a1e6b05a \ + --hash=sha256:02c65b9817512edc6a4ae7c7e987fea799d2e0ee40c53ec573a692bee24de876 \ + --hash=sha256:02ebb8bf46c13e36998aeaad1de9b48f4caf545e91d14041270d9dca767b780c \ + --hash=sha256:07eb3c611ce363c51a933bf6bd7f8e3878a51d124acfc89452a75120bc436289 \ + --hash=sha256:1034d91442ead5a95b5aaef90dbfaca8633b0247d1e41621d1e9f9db88c36298 \ + --hash=sha256:116a7c3c616dd14a3de8c64a965828b197e5f2d121fedd2f8c5585c547e87b02 \ + --hash=sha256:19e0a019d2c4dcc5e598cd4a4bc7b008546b0358bd322537c74ad47a5386884f \ + --hash=sha256:1c7abdac90433b09bad6c43a43af253e688c9cfc1c86d332aed13f9a7c7f65e2 \ + --hash=sha256:1e986b206dae4476f41bcec1faa057851f3889503a70e1bdb2378d406223994a \ + --hash=sha256:272d3892a1e1a2dbc39cc5cde96834c236d5327e2122d3aaa19f6614531bb6eb \ + --hash=sha256:278d2ed7c16cfc075c91378c4f47924c0625f5fc84b2d50d921b18b7975bd210 \ + --hash=sha256:2ad40e31093a4af319dadf503b2467ccdc8f67c72e4bcba97f8c10cb078207b5 \ + --hash=sha256:2e920d506ec85eb4ba50cd4228c2bec05642894d4c73c59b3a2fe20346bd00ee \ + --hash=sha256:3359fc442a743e870f4588fcf5dcbc1bf929df1fad8fb9905cd94e5edb02e84c \ + --hash=sha256:37933ec6e693e51a5b07505bd05de57eee12f3e8c32b07da7e73669398e6630a \ + --hash=sha256:398af5e3ba9cf768787eef45c803ff9614cc3e22a5b2f7d7ae116df8b11e3314 \ + --hash=sha256:3b747a674c20a67343cb61d43fdd9207ce5da6a99f629c6e2541aa0e89215bcd \ + --hash=sha256:461665ff58895f508e2866824a47bdee72497b091c730071f2b7575d5762ab65 \ + --hash=sha256:4c6fdd4fccbec90cc8a01fc00773fcd5fa28db683c116ee3cb35cd5da9ef6c37 \ + --hash=sha256:5829b792bf5822fd0a6f6eb34c5f81dd074f01d570ed7f36aa101d6fc7a0a6e4 \ + --hash=sha256:596d1f98fc70232fcb6590c439f43b350cb762fb5d61ce7b0e9db4539654cc13 \ + --hash=sha256:5ae44e10a8e3407dbe138984f21e536583f2bba1be9491239f942c2464ac0894 \ + --hash=sha256:635f5d4dd18758a1fbd1049a8e8d2fee4ffed124462d837d1a02a0e009c3ab31 \ + --hash=sha256:64e52e2b3970bd891309c113b54cf0e4384762c934d5ae56e283f9a0afcd953e \ + --hash=sha256:66741ef4ee08ea0b2cc3c86916ab66b6aef03768525627fd6a1b34968b4e3709 \ + --hash=sha256:67b741654b851abafb7bc625b6d1cdd520a379074e64b6a128e3b688c3c04740 \ + --hash=sha256:6ac08d24c1f16bd2bf5eca8eaf8304812f44af5cfe5062006ec676e7e1d50afc \ + --hash=sha256:6f998db4e71b645350b9ac28a2167e6632c239963ca9da411523bb439c5c514d \ + --hash=sha256:72218785ce41b9cfd2fc1d6a017dc1ff7acfc4c17d01053265c41a2c0cc39b8c \ + --hash=sha256:74dea7751d98034887dbd821b7aae3e1d36eda111d6ca36c206c44478035709c \ + --hash=sha256:759ce4851a4bb15ecabae28f4d2e18983c244eddd767f560165563bf9aefbc8d \ + --hash=sha256:77e2fd3057c9d78e225fa0a2160f96b64a824de17840351b26825b0848022906 \ + --hash=sha256:7c074fece789b5034b9b1404a1f8208fc2d4c6ce9decdd16e8220c5a793e6f61 \ + --hash=sha256:7c42c70cd1d362284289c6273adda4c6af8039a8ae12dc451dcd61cdabb8ab57 \ + --hash=sha256:7f57f14606cd1dd0f0de396e1e53824c371e9544a822648cd76c034d209b559c \ + --hash=sha256:83c681c526a3439b5cf94f7420471705bbf96262f49a6fe546a6db5f687a3d4a \ + --hash=sha256:8485b340a6a9e76c62a7dce3c98e5f102c9219f4cfbf896a00cf48caf078d438 \ + --hash=sha256:84e6e8cd997930fc66d5bb4fde61e2b62ba19d62b7abd7a69920406f9ecca946 \ + --hash=sha256:89284716bc6a5a415d4eaa11b1726d2d60a0cd12aadf5439828353662ede9dd7 \ + --hash=sha256:8b87e1a59c38f275c0e3676fc2ab6d59eccecfd460be267ac360cc31f7bcde96 \ + --hash=sha256:8f24ed114432de109aa9fd317278518a5af2d31ac2ea6b952b2f7782b43da091 \ + --hash=sha256:98cb4d057f285bd80d8778ebc4fde6b4d509ac3f331758fb1528b733215443ae \ + --hash=sha256:998679bf62b7fb599d2878aa3ed06b9ce688b8974893e7223c60db155f26bd8d \ + --hash=sha256:9ba053c5f50430a3fcfd36f75aff9caeba0440b2d076afdb79a318d6ca245f88 \ + --hash=sha256:9c99616c853bb585301df6de07ca2cadad344fd1ada6d62bb30aec05219c45d2 \ + --hash=sha256:a1fd716e7a01f8e717490fbe2e431d2905ab8aa598b9b12f8d10abebb36b04dd \ + --hash=sha256:a2355cba1f4ad8b6988a4ca3feed5bff33f6af2d7f134852cf279c2aebfde541 \ + --hash=sha256:b1f8133c9a275df5613a451e73f36c2aea4fe13c5c8997e22cf355ebd7bd0728 \ + --hash=sha256:b8667b48e7a7ef66afba2c81e1094ef526388d35b873966d8a9a447974ed9178 \ + --hash=sha256:ba1eb1843304b1e5537e1fca632fa894d6f6deca8d6389636ee5b4797affb968 \ + --hash=sha256:be82c3c8cfb15b30f36768797a640e800513793d6ae1724aaaafe5bf86f8f346 \ + --hash=sha256:c02ec1c5856179f171e032a31d6f8bf84e5a75c45c33b2e20a3de353b266ebd8 \ + --hash=sha256:c672d99a345849301784604bfeaeba4db0c7aae50b95be04dd651fd2a7310b93 \ + --hash=sha256:c6c777a480337ac14f38564ac88ae82d4cd238bf293f0a22295b66eb89ffced7 \ + --hash=sha256:cae0274952c079886567f3f4f685bcaf5708f0a23a5f5216fdab71f81a6c0273 \ + --hash=sha256:cd67cf24a553339d5062eff51013780a00d6f97a39ca062781d06b3a73b15462 \ + --hash=sha256:d3515f198eaa2f0ed49f8819d5732d70698c3fa37384146079b3799b97667a94 \ + --hash=sha256:d5280312b9af0976231f9e317c20e4a61cd2f9629b7bfea6a693d1878a264ebd \ + --hash=sha256:de06adc872bcd8c2a4e0dc51250e9e65ef2ca91be023b9d13ebd67c2ba552e1e \ + --hash=sha256:e1674e4307fa3024fc897ca774e9c7562c957af85df55efe2988ed9056dc4e57 \ + --hash=sha256:e2096eddb4e7c7bdae4bd69ad364e55e07b8316653234a56552d9c988bd2d61b \ + --hash=sha256:e560628513ed34759456a416bf86b54b2476c59144a9138165c9a1575801d0d9 \ + --hash=sha256:edfedb64740750e1a3b16152620220f51d58ff1b4abceb339ca92e934775c27a \ + --hash=sha256:f13cae8cc389a440def0c8c52057f37359014ccbc9dc1f0827936bcd367c6100 \ + --hash=sha256:f314013e7dcd5cf45ab1945d92e713eec788166262ae8deb2cfacd53def27325 \ + --hash=sha256:f583edb943cf2e09c60441b910d6a20b4d9d626c75a36c8fcac01a6c96c01183 \ + --hash=sha256:fd8536e902db7e365f49e7d9029283403974ccf29b13fc7028b97e2295b33556 \ + --hash=sha256:fe70e325aa68fa4b5edf7d1a4b6f691eb04bbccac0ace68e34820d283b5f80d4 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.1 \ + --hash=sha256:103ebc4ba331ab1bfdac0250f8033627a2cd7cde09e7ccff9181e31ba4315b2c \ + --hash=sha256:eae451a7b2e2cdbaaa0fd2eb00cc8a1ee5e95e16b55597359cbc3d27d7d90e33 + # via google-cloud-storage +googleapis-common-protos==1.63.2 \ + --hash=sha256:27a2499c7e8aff199665b22741997e485eccc8645aa9176c7c988e6fae507945 \ + --hash=sha256:27c5abdffc4911f28101e635de1533fb4cfd2c37fbaa9174587c799fac90aa87 + # via google-api-core idna==3.7 \ --hash=sha256:028ff3aadf0609c1fd278d8ea3089299412a7a8b9bd005dd08b9f8285bcb5cfc \ --hash=sha256:82fee1fc78add43492d3a1898bfa6d8a904cc97d8427f683ed8e798d07761aa0 # via requests -kinto-http==11.1.0 \ - --hash=sha256:79f9a8eabd1850e38b076e2625eae380f8ba437903f7ce51ebb106c24ef73d46 \ - --hash=sha256:e475b7e15b4c7562b6a07e6af51afcae626cbc1a3982fb0ca46b16af07d3b668 +kinto-http==11.2.0 \ + --hash=sha256:7b6c1051eac76b72dce50b9933e45885edf25fde609c1bb43e77417b816e8537 \ + --hash=sha256:cfb7b206aa9ba7b4b299caac58416648d26512cb9b96f504b7441cc290ff1da2 # via -r requirements.in +proto-plus==1.24.0 \ + --hash=sha256:30b72a5ecafe4406b0d339db35b56c4059064e69227b8c3bda7462397f966445 \ + --hash=sha256:402576830425e5f6ce4c2a6702400ac79897dab0b4343821aa5188b0fab81a12 + # via google-api-core +protobuf==5.27.2 \ + --hash=sha256:0e341109c609749d501986b835f667c6e1e24531096cff9d34ae411595e26505 \ + --hash=sha256:176c12b1f1c880bf7a76d9f7c75822b6a2bc3db2d28baa4d300e8ce4cde7409b \ + --hash=sha256:354d84fac2b0d76062e9b3221f4abbbacdfd2a4d8af36bab0474f3a0bb30ab38 \ + --hash=sha256:4fadd8d83e1992eed0248bc50a4a6361dc31bcccc84388c54c86e530b7f58863 \ + --hash=sha256:54330f07e4949d09614707c48b06d1a22f8ffb5763c159efd5c0928326a91470 \ + --hash=sha256:610e700f02469c4a997e58e328cac6f305f649826853813177e6290416e846c6 \ + --hash=sha256:7fc3add9e6003e026da5fc9e59b131b8f22b428b991ccd53e2af8071687b4fce \ + --hash=sha256:9e8f199bf7f97bd7ecebffcae45ebf9527603549b2b562df0fbc6d4d688f14ca \ + --hash=sha256:a109916aaac42bff84702fb5187f3edadbc7c97fc2c99c5ff81dd15dcce0d1e5 \ + --hash=sha256:b848dbe1d57ed7c191dfc4ea64b8b004a3f9ece4bf4d0d80a367b76df20bf36e \ + --hash=sha256:f3ecdef226b9af856075f28227ff2c90ce3a594d092c39bee5513573f25e2714 + # via + # google-api-core + # googleapis-common-protos + # proto-plus +pyasn1==0.6.0 \ + --hash=sha256:3a35ab2c4b5ef98e17dfdec8ab074046fbda76e281c5a706ccd82328cfc8f64c \ + --hash=sha256:cca4bb0f2df5504f02f6f8a775b6e416ff9b0b3b16f7ee80b5a3153d9b804473 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.0 \ + --hash=sha256:831dbcea1b177b28c9baddf4c6d1013c24c3accd14a1873fffaa6a2e905f17b6 \ + --hash=sha256:be04f15b66c206eed667e0bb5ab27e2b1855ea54a842e5037738099e8ca4ae0b + # via google-auth python-decouple==3.8 \ --hash=sha256:ba6e2657d4f376ecc46f77a3a615e058d93ba5e465c01bbe57289bfb7cce680f \ --hash=sha256:d0d45340815b25f4de59c974b855bb38d03151d81b037d9e3f463b0c9f8cbd66 @@ -37,24 +260,29 @@ requests==2.32.3 \ --hash=sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6 # via # -r requirements.in + # google-api-core + # google-cloud-storage # kinto-http +rsa==4.9 \ + --hash=sha256:90260d9058e514786967344d0ef75fa8727eed8a7d2e43ce9f4bcf1b536174f7 \ + --hash=sha256:e38464a49c6c85d7f1351b0126661487a7e0a14a50f1675ec50eb34d4f20ef21 + # via google-auth sentry-sdk==2.7.1 \ --hash=sha256:25006c7e68b75aaa5e6b9c6a420ece22e8d7daec4b7a906ffd3a8607b67c037b \ --hash=sha256:ef1b3d54eb715825657cd4bb3cb42bb4dc85087bac14c56b0fd8c21abd968c9a # via -r requirements.in -unidecode==1.3.4 \ - --hash=sha256:8e4352fb93d5a735c788110d2e7ac8e8031eb06ccbfe8d324ab71735015f9342 \ - --hash=sha256:afa04efcdd818a93237574791be9b2817d7077c25a068b00f8cff7baa4e59257 +unidecode==1.3.8 \ + --hash=sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4 \ + --hash=sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39 # via kinto-http -urllib3==1.26.19 \ - --hash=sha256:37a0344459b199fce0e80b0d3569837ec6b6937435c5244e7fd73fa6006830f3 \ - --hash=sha256:3e3d753a8618b86d7de333b4223005f68720bcd6a7d2bcb9fbd2229ec7c1e429 +urllib3==2.2.2 \ + --hash=sha256:a448b2f64d686155468037e1ace9f2d2199776e17f0a46610480d311f73e3472 \ + --hash=sha256:dd505485549a7a552833da5e6063639d0d177c04f23bc3864e41e5dc5f612168 # via # requests # sentry-sdk -# The following packages are considered to be unsafe in a requirements file: -setuptools==70.1.1 \ - --hash=sha256:937a48c7cdb7a21eb53cd7f9b59e525503aa8abaf3584c730dc5f7a5bec3a650 \ - --hash=sha256:a58a8fde0541dab0419750bcc521fbdf8585f6e5cb41909df3a472ef7b81ca95 - # via -r requirements.in +# WARNING: The following packages were not pinned, but pip requires them to be +# pinned when the requirements file includes hashes and the requirement is not +# satisfied by a package already installed. Consider using the --allow-unsafe flag. +# setuptools diff --git a/tests/test_build_bundles.py b/tests/test_build_bundles.py new file mode 100644 index 00000000..07ea068b --- /dev/null +++ b/tests/test_build_bundles.py @@ -0,0 +1,258 @@ +import os +import zipfile +from unittest.mock import MagicMock, patch + +import pytest +import responses + +from commands.build_bundles import ( + KintoClient, + build_bundles, + call_parallel, + fetch_all_changesets, + fetch_attachment, + get_modified_timestamp, + sync_cloud_storage, + write_zip, +) + + +@pytest.fixture +def mock_fetch_all_changesets(): + with patch("commands.build_bundles.fetch_all_changesets") as mock_fetch: + yield mock_fetch + + +@pytest.fixture +def mock_write_zip(): + with patch("commands.build_bundles.write_zip") as mock_write: + yield mock_write + + +@pytest.fixture +def mock_sync_cloud_storage(): + with patch("commands.build_bundles.sync_cloud_storage") as mock_sync_cloud_storage: + yield mock_sync_cloud_storage + + +@pytest.fixture +def mock_storage_client(): + with patch("commands.build_bundles.storage.Client") as mock_client: + mock_bucket = MagicMock() + mock_client.return_value.bucket.return_value = mock_bucket + yield mock_bucket + + +@pytest.fixture +def mock_environment(monkeypatch): + monkeypatch.setenv("GOOGLE_APPLICATION_CREDENTIALS", "/path/creds.json") + + +def test_call_parallel(): + def dummy_func(x, y): + return x + y + + args_list = [(1, 2), (3, 4), (5, 6)] + results = call_parallel(dummy_func, args_list) + assert results == [3, 7, 11] + + +@responses.activate +@patch("commands.build_bundles.random") +def test_fetch_all_changesets(mock_random): + mock_random.randint.return_value = 42 + changeset_url = ( + "http://example.com/v1/buckets/{bid}/collections/{cid}/changeset?_expected={expected}" + ) + responses.add( + responses.GET, + changeset_url.format(bid="monitor", cid="changes", expected=42), + json={ + "changes": [ + {"bucket": "bucket1", "collection": "collection1", "last_modified": 123}, + {"bucket": "bucket2", "collection": "collection2", "last_modified": 456}, + ] + }, + ) + responses.add( + responses.GET, + changeset_url.format(bid="bucket1", cid="collection1", expected=123), + json={"metadata": {"id": "collection1"}, "changes": [{"id": "abc"}]}, + ) + responses.add( + responses.GET, + changeset_url.format(bid="bucket2", cid="collection2", expected=456), + json={"metadata": {"id": "collection2"}, "changes": [{"id": "edf"}]}, + ) + + client = KintoClient(server_url="http://example.com/v1") + + changesets = fetch_all_changesets(client) + assert len(changesets) == 2 + assert changesets[0]["bucket"] == "bucket1" + assert changesets[0]["metadata"]["id"] == "collection1" + assert changesets[1]["bucket"] == "bucket2" + assert changesets[1]["metadata"]["id"] == "collection2" + + +@responses.activate +def test_fetch_attachment(): + url = "http://example.com/file" + responses.add(responses.GET, url, body=b"file_content", status=200) + + content = fetch_attachment(url) + assert content == b"file_content" + + +@responses.activate +def test_get_modified_timestamp(): + url = "http://example.com/file" + responses.add( + responses.GET, + url, + body=b"file_content", + headers={"Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT"}, + ) + timestamp = get_modified_timestamp(url) + assert timestamp == 1720004688000 + + +@responses.activate +def test_get_modified_timestamp_missing(): + url = "http://example.com/file" + responses.add(responses.GET, url, status=404) + timestamp = get_modified_timestamp(url) + assert timestamp is None + + +def test_write_zip(tmpdir): + content = [("file1.txt", "content1"), ("file2.txt", "content2")] + output_path = os.path.join(tmpdir, "test.zip") + write_zip(output_path, content) + + with zipfile.ZipFile(output_path, "r") as zip_file: + assert set(zip_file.namelist()) == {"file1.txt", "file2.txt"} + assert zip_file.read("file1.txt") == b"content1" + assert zip_file.read("file2.txt") == b"content2" + + +@responses.activate +def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_cloud_storage): + server_url = "http://testserver" + event = {"server": server_url} + + responses.add( + responses.GET, + server_url, + json={"capabilities": {"attachments": {"base_url": f"{server_url}/attachments/"}}}, + ) + responses.add(responses.GET, f"{server_url}/attachments/file.jpg", body=b"jpeg_content") + + responses.add( + responses.GET, + f"{server_url}/attachments/bundles/changesets.zip", + headers={ + "Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT" # 1720004688000 + }, + ) + + mock_fetch_all_changesets.return_value = [ + { # collection hasn't changed since last bundling + "bucket": "bucket0", + "changes": [ + {"id": "record1", "attachment": {"location": "file.jpg", "size": 10}}, + {"id": "record2"}, + ], + "metadata": {"id": "collection0", "attachment": {"bundle": True}}, + "timestamp": 1720004688000 - 10, + }, + { + "bucket": "bucket1", + "changes": [ + {"id": "record1", "attachment": {"location": "file.jpg", "size": 10}}, + {"id": "record2"}, + ], + "metadata": {"id": "collection1", "attachment": {"bundle": True}}, + "timestamp": 1720004688000 + 10, + }, + { # collection without bundle flag + "bucket": "bucket2", + "changes": [{"id": "record2"}], + "metadata": {"id": "collection2"}, + "timestamp": 1720004688000 + 10, + }, + { # collection without attachments + "bucket": "bucket3", + "changes": [{"id": "record3"}], + "metadata": {"id": "collection3", "attachment": {"bundle": True}}, + "timestamp": 1720004688000 + 10, + }, + { # attachments too big + "bucket": "bucket4", + "changes": [ + {"id": "id1", "attachment": {"size": 10_000_000}}, + {"id": "id2", "attachment": {"size": 10_000_000}}, + {"id": "id3", "attachment": {"size": 10_000_000}}, + ], + "metadata": {"id": "collection4", "attachment": {"bundle": True}}, + "timestamp": 1720004688000 + 10, + }, + ] + + build_bundles(event, context={}) + + assert mock_write_zip.call_count == 2 # One for changesets and only one for the attachments + calls = mock_write_zip.call_args_list + + # Assert the first call (changesets.zip) + changesets_zip_path, changesets_zip_files = calls[0][0] + assert changesets_zip_path == "changesets.zip" + assert len(changesets_zip_files) == 5 + assert changesets_zip_files[0][0] == "bucket0--collection0.json" + assert changesets_zip_files[1][0] == "bucket1--collection1.json" + assert changesets_zip_files[2][0] == "bucket2--collection2.json" + assert changesets_zip_files[3][0] == "bucket3--collection3.json" + + # Assert the second call (attachments zip) + attachments_zip_path, attachments_zip_files = calls[1][0] + assert attachments_zip_path == "bucket1--collection1.zip" + assert len(attachments_zip_files) == 2 + assert attachments_zip_files[0][0] == "record1.meta.json" + assert attachments_zip_files[1][0] == "record1" + assert attachments_zip_files[1][1] == b"jpeg_content" + + mock_sync_cloud_storage.assert_called_once_with( + "remote-settings-nonprod-stage-attachments", + "bundles", + [ + "changesets.zip", + "bucket1--collection1.zip", + ], + [ + "bucket2--collection2.zip", + "bucket3--collection3.zip", + ], + ) + + +def test_sync_cloud_storage_upload_and_delete(mock_storage_client, mock_environment): + bucket = mock_storage_client + + mock_blob1 = MagicMock() + mock_blob2 = MagicMock() + bucket.blob.side_effect = [mock_blob1, mock_blob2] + + mock_blob3 = MagicMock() + mock_blob3.name = "remote/file3.txt" + bucket.list_blobs.return_value = [mock_blob1, mock_blob2, mock_blob3] + + sync_cloud_storage( + "remote-bucket", "remote", ["file1.txt", "file2.txt"], ["file3.txt", "file4.txt"] + ) + + # Check uploads + mock_blob1.upload_from_filename.assert_called_once_with("file1.txt") + mock_blob2.upload_from_filename.assert_called_once_with("file2.txt") + + # Check deletions + mock_blob3.delete.assert_called_once()