From 7f6768e8b2ab26c7333d3bd430ee6da20831df6d Mon Sep 17 00:00:00 2001 From: Michael Bianco Date: Fri, 20 Aug 2021 09:09:07 -0600 Subject: [PATCH 1/4] Adding deduped filesystem persistence --- vcr/cassette.py | 3 ++ vcr/persisters/deduplicated_filesystem.py | 58 +++++++++++++++++++++++ 2 files changed, 61 insertions(+) create mode 100644 vcr/persisters/deduplicated_filesystem.py diff --git a/vcr/cassette.py b/vcr/cassette.py index 5822afac..5863976e 100644 --- a/vcr/cassette.py +++ b/vcr/cassette.py @@ -12,6 +12,9 @@ from .matchers import get_matchers_results, method, requests_match, uri from .patch import CassettePatcherBuilder from .persisters.filesystem import FilesystemPersister +from .persisters.deduplicated_filesystem import DeduplicatedFilesystemPersister +from .util import partition_dict +from ._handle_coroutine import handle_coroutine from .record_mode import RecordMode from .serializers import yamlserializer from .util import partition_dict diff --git a/vcr/persisters/deduplicated_filesystem.py b/vcr/persisters/deduplicated_filesystem.py new file mode 100644 index 00000000..2ba92b8d --- /dev/null +++ b/vcr/persisters/deduplicated_filesystem.py @@ -0,0 +1,58 @@ +# https://github.com/Azure/azure-sdk-for-python/pull/17973/files + +import copy +import os + +from vcr.serialize import serialize, deserialize +from .filesystem import FilesystemPersister + + +ATTRIBUTES_TO_COMPARE = [ + "body", + "headers", + "host", + "method", + "path", + "protocol", + "query", + "scheme", + "uri", + "url", +] + + +def trim_duplicates(cassette_dict): + # Dict[str] -> Dict[str] + cassette_copy = copy.deepcopy(cassette_dict) + requests = cassette_dict["requests"] + responses = cassette_dict["responses"] + pairs_to_remove = [] + for i in range(1, len(requests)): + for j in range(1, min(i, 4)): + if same_requests(requests[i - j], requests[i]): + pairs_to_remove.append(i - j) + # Always keep the last one + ret = {"requests": [], "responses": []} + + for i in range(len(requests)): + if i not in pairs_to_remove: + ret["requests"].append(requests[i]) + ret["responses"].append(responses[i]) + + return ret + + +def same_requests(request1, request2): + # (vcr.Request, vcr.Request) -> bool + for attr in ATTRIBUTES_TO_COMPARE: + if getattr(request1, attr) != getattr(request2, attr): + return False + + return True + + +class DeduplicatedFilesystemPersister(FilesystemPersister): + @staticmethod + def save_cassette(cassette_path, cassette_dict, serializer): + cassette_dict = trim_duplicates(cassette_dict) + FilesystemPersister.save_cassette(cassette_path, cassette_dict, serializer) From 8321904bfa62b9dd0dd11da35da905e821d8c486 Mon Sep 17 00:00:00 2001 From: Michael Bianco Date: Fri, 20 Aug 2021 09:13:35 -0600 Subject: [PATCH 2/4] Attempt at testing new dedup filesystem persistence --- tests/unit/test_persist.py | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/unit/test_persist.py b/tests/unit/test_persist.py index 025ad968..fafe6dc5 100644 --- a/tests/unit/test_persist.py +++ b/tests/unit/test_persist.py @@ -1,8 +1,10 @@ import pytest +from unittest.mock import patch from vcr.persisters.filesystem import FilesystemPersister +from vcr.persisters.deduplicated_filesystem import DeduplicatedFilesystemPersister from vcr.serializers import jsonserializer, yamlserializer - +import vcr @pytest.mark.parametrize( "cassette_path, serializer", @@ -28,3 +30,22 @@ def test_load_cassette_with_invalid_cassettes(cassette_path, serializer): with pytest.raises(Exception) as excinfo: FilesystemPersister.load_cassette(cassette_path, serializer) assert "run the migration script" not in excinfo.exconly() + +@pytest.mark.parametrize( + "cassette_path, serializer", + [ + ("tests/fixtures/migration/cassette_with_duplicate_requests.yaml", yamlserializer), + ], +) +def test_load_cassette_with_duplicate_requests_cassettes(cassette_path, serializer): + cassette_dict = DeduplicatedFilesystemPersister.load_cassette(cassette_path, serializer) + breakpoint() + with patch.object(FilesystemPersister, "save_cassette") as mock: + with vcr.use_cassette(cassette_path, serializer=serializer, persister=DeduplicatedFilesystemPersister): + pass + + # it's deduped when it is saved + # DeduplicatedFilesystemPersister.save_cassette(cassette_path, cassette_dict, serializer) + breakpoint() + assert mock.call_count == 1 + From 5ebc0e7f331d91561f511259c165baa5be6edc37 Mon Sep 17 00:00:00 2001 From: Michael Bianco Date: Fri, 20 Aug 2021 09:19:25 -0600 Subject: [PATCH 3/4] Adding duplicate request cassette --- .../cassette_with_duplicate_requests.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 tests/fixtures/migration/cassette_with_duplicate_requests.yaml diff --git a/tests/fixtures/migration/cassette_with_duplicate_requests.yaml b/tests/fixtures/migration/cassette_with_duplicate_requests.yaml new file mode 100644 index 00000000..121289ab --- /dev/null +++ b/tests/fixtures/migration/cassette_with_duplicate_requests.yaml @@ -0,0 +1,19 @@ +interactions: +- request: + body: null + headers: + accept: ['*/*'] + accept-encoding: ['gzip, deflate, compress'] + user-agent: ['python-requests/2.2.1 CPython/2.6.1 Darwin/10.8.0'] + method: GET + uri: http://httpbin.org/ip + response: + body: {string: "{\n \"origin\": \"217.122.164.194\"\n}"} + headers: + access-control-allow-origin: ['*'] + content-type: [application/json] + date: ['Mon, 21 Apr 2014 23:06:09 GMT'] + server: [gunicorn/0.17.4] + content-length: ['32'] + connection: [keep-alive] + status: {code: 200, message: OK} From 2c7878ea2b02e79b035c6ef10ce081d26873865e Mon Sep 17 00:00:00 2001 From: Michael Bianco Date: Mon, 14 Nov 2022 15:14:58 -0700 Subject: [PATCH 4/4] removing unneeded imports --- vcr/cassette.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/vcr/cassette.py b/vcr/cassette.py index 5863976e..c943bd07 100644 --- a/vcr/cassette.py +++ b/vcr/cassette.py @@ -13,8 +13,6 @@ from .patch import CassettePatcherBuilder from .persisters.filesystem import FilesystemPersister from .persisters.deduplicated_filesystem import DeduplicatedFilesystemPersister -from .util import partition_dict -from ._handle_coroutine import handle_coroutine from .record_mode import RecordMode from .serializers import yamlserializer from .util import partition_dict