Skip to content

Commit

Permalink
Improve build_bundles idempotence (#1488)
Browse files Browse the repository at this point in the history
  • Loading branch information
leplatrem authored Aug 28, 2024
1 parent b584058 commit da7af1c
Show file tree
Hide file tree
Showing 2 changed files with 81 additions and 55 deletions.
93 changes: 59 additions & 34 deletions commands/build_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,19 +127,7 @@ def build_bundles(event, context):

base_url = client.server_info()["capabilities"]["attachments"]["base_url"]

existing_bundle_timestamp = get_modified_timestamp(
f"{base_url}{DESTINATION_FOLDER}/changesets.zip"
)
if existing_bundle_timestamp is None:
print("No previous bundle found") # Should only happen once.
existing_bundle_timestamp = -1

all_changesets = fetch_all_changesets(client)
highest_timestamp = max(c["timestamp"] for c in all_changesets)

if existing_bundle_timestamp >= highest_timestamp:
print("Existing bundle up-to-date. Nothing to do.")
return

# Build all archives in temp directory.
tmp_dir = tempfile.mkdtemp()
Expand All @@ -148,26 +136,6 @@ def build_bundles(event, context):
bundles_to_upload = []
bundles_to_delete = []

write_zip(
"changesets.zip",
[
("{metadata[bucket]}--{metadata[id]}.json".format(**changeset), json.dumps(changeset))
for changeset in all_changesets
],
)
bundles_to_upload.append("changesets.zip")

# Build a bundle for collections that are marked with "startup" flag.
write_zip(
"startup.zip",
[
("{metadata[bucket]}--{metadata[id]}.json".format(**changeset), json.dumps(changeset))
for changeset in all_changesets
if "startup" in changeset["metadata"].get("flags", [])
],
)
bundles_to_upload.append("startup.zip")

# Build attachments bundle for collections which have the option set.
for changeset in all_changesets:
bid = changeset["metadata"]["bucket"]
Expand All @@ -179,19 +147,27 @@ def build_bundles(event, context):
bundles_to_delete.append(attachments_bundle_filename)
if not BUILD_ALL:
continue
else:
print(f"{bid}/{cid} has attachments bundles enabled")

existing_bundle_timestamp = get_modified_timestamp(
f"{base_url}{DESTINATION_FOLDER}/{bid}--{cid}.zip"
)
print(f"'{bid}--{cid}.zip' was modified at {existing_bundle_timestamp}")
print(f"Latest change on {bid}/{cid} was at {changeset["timestamp"]}")
if not BUILD_ALL and changeset["timestamp"] < existing_bundle_timestamp:
# Collection hasn't changed since last bundling.
print(f"{bid}/{cid} hasn't changed since last bundle.")
continue

# Skip bundle if no attachments found.
records = [r for r in changeset["changes"] if "attachment" in r]
if not records:
print("%s/%s has no attachments" % (bid, cid))
print(f"{bid}/{cid} has no attachments")
bundles_to_delete.append(attachments_bundle_filename)
continue

print("%s/%s: %s records with attachments" % (bid, cid, len(records)))
print(f"{bid}/{cid} {len(records)} records with attachments")

# Skip bundle if total size is too big.
total_size_bytes = sum(r["attachment"]["size"] for r in records)
Expand All @@ -211,6 +187,55 @@ def build_bundles(event, context):
)
bundles_to_upload.append(attachments_bundle_filename)

highest_timestamp = max(c["timestamp"] for c in all_changesets)
print(f"Latest server change was at {highest_timestamp}")

existing_bundle_timestamp = get_modified_timestamp(
f"{base_url}{DESTINATION_FOLDER}/changesets.zip"
)
if existing_bundle_timestamp is None:
print("No previous 'changesets.zip' bundle found") # Should only happen once.
existing_bundle_timestamp = -1
print(f"'changesets.zip' was published at {existing_bundle_timestamp}")
if BUILD_ALL or (existing_bundle_timestamp < highest_timestamp):
write_zip(
"changesets.zip",
[
(
"{metadata[bucket]}--{metadata[id]}.json".format(**changeset),
json.dumps(changeset),
)
for changeset in all_changesets
],
)
bundles_to_upload.append("changesets.zip")
else:
print("Existing 'changesets.zip' bundle up-to-date. Nothing to do.")

# Build a bundle for collections that are marked with "startup" flag.
existing_bundle_timestamp = get_modified_timestamp(
f"{base_url}{DESTINATION_FOLDER}/startup.zip"
)
if existing_bundle_timestamp is None:
print("No previous 'startup.zip' bundle found") # Should only happen once.
existing_bundle_timestamp = -1
print(f"'startup.zip' was published at {existing_bundle_timestamp}")
if BUILD_ALL or existing_bundle_timestamp < highest_timestamp:
write_zip(
"startup.zip",
[
(
"{metadata[bucket]}--{metadata[id]}.json".format(**changeset),
json.dumps(changeset),
)
for changeset in all_changesets
if "startup" in changeset["metadata"].get("flags", [])
],
)
bundles_to_upload.append("startup.zip")
else:
print("Existing 'startup.zip' bundle up-to-date. Nothing to do.")

if not SKIP_UPLOAD:
sync_cloud_storage(
STORAGE_BUCKET_NAME, DESTINATION_FOLDER, bundles_to_upload, bundles_to_delete
Expand Down
43 changes: 22 additions & 21 deletions tests/test_build_bundles.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,13 +148,14 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
)
responses.add(responses.GET, f"{server_url}/attachments/file.jpg", body=b"jpeg_content")

responses.add(
responses.GET,
f"{server_url}/attachments/bundles/changesets.zip",
headers={
"Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT" # 1720004688000
},
)
for bundle in ["changesets", "startup"] + [f"bucket{i}--collection{i}" for i in range(5)]:
responses.add(
responses.GET,
f"{server_url}/attachments/bundles/{bundle}.zip",
headers={
"Last-Modified": "Wed, 03 Jul 2024 11:04:48 GMT" # 1720004688000
},
)

mock_fetch_all_changesets.return_value = [
{ # collection hasn't changed since last bundling
Expand All @@ -167,7 +168,7 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
},
{
"changes": [
{"id": "record1", "attachment": {"location": "file.jpg", "size": 10}},
{"id": "record1", "attachment": {"location": "file.jpg", "size": 10000000}},
{"id": "record2"},
],
"metadata": {"id": "collection1", "bucket": "bucket1", "attachment": {"bundle": True}},
Expand Down Expand Up @@ -209,8 +210,16 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
) # changesets.zip, startup.zip, and only one for the attachments
calls = mock_write_zip.call_args_list

# Assert the first call (changesets.zip)
changesets_zip_path, changesets_zip_files = calls[0][0]
# Assert the first call (attachments zip)
attachments_zip_path, attachments_zip_files = calls[0][0]
assert attachments_zip_path == "bucket1--collection1.zip"
assert len(attachments_zip_files) == 2
assert attachments_zip_files[0][0] == "record1.meta.json"
assert attachments_zip_files[1][0] == "record1"
assert attachments_zip_files[1][1] == b"jpeg_content"

# Assert the second call (changesets.zip)
changesets_zip_path, changesets_zip_files = calls[1][0]
assert changesets_zip_path == "changesets.zip"
assert len(changesets_zip_files) == 6
assert changesets_zip_files[0][0] == "bucket0--collection0.json"
Expand All @@ -220,27 +229,19 @@ def test_build_bundles(mock_fetch_all_changesets, mock_write_zip, mock_sync_clou
assert changesets_zip_files[4][0] == "bucket4--collection4.json"
assert changesets_zip_files[5][0] == "bucket5--collection5.json"

# Assert the second call (startup.zip)
startup_zip_path, startup_zip_files = calls[1][0]
# Assert the third call (startup.zip)
startup_zip_path, startup_zip_files = calls[2][0]
assert startup_zip_path == "startup.zip"
assert len(startup_zip_files) == 1
assert startup_zip_files[0][0] == "bucket5--collection5.json"

# Assert the third call (attachments zip)
attachments_zip_path, attachments_zip_files = calls[2][0]
assert attachments_zip_path == "bucket1--collection1.zip"
assert len(attachments_zip_files) == 2
assert attachments_zip_files[0][0] == "record1.meta.json"
assert attachments_zip_files[1][0] == "record1"
assert attachments_zip_files[1][1] == b"jpeg_content"

mock_sync_cloud_storage.assert_called_once_with(
"remote-settings-test-local-attachments",
"bundles",
[
"bucket1--collection1.zip",
"changesets.zip",
"startup.zip",
"bucket1--collection1.zip",
],
[
"bucket2--collection2.zip",
Expand Down

0 comments on commit da7af1c

Please sign in to comment.