Skip to content

Commit

Permalink
Merge pull request #63 from RyouMon/fix-file-download-exception
Browse files Browse the repository at this point in the history
Fix FileException when downloading media. (caused by OffsiteMiddleware)
  • Loading branch information
RyouMon authored Dec 2, 2024
2 parents 6e828df + bc5ed0f commit ecd5f2d
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 7 deletions.
5 changes: 4 additions & 1 deletion src/favorites_crawler/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ class BasePipeline(FilesPipeline):
def get_media_requests(self, item, info):
item_dict = ItemAdapter(item).asdict()
referer = item_dict.get('referer')
return (Request(url, headers={'referer': referer}) for url in item_dict.get(self.files_urls_field, ()))
return (
Request(url, headers={'referer': referer}, dont_filter=True)
for url in item_dict.get(self.files_urls_field, ())
)

def file_path(self, request, response=None, info=None, *, item=None):
return item.get_filepath(request.url, info.spider)
Expand Down
12 changes: 6 additions & 6 deletions tests/test_pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class TestBasePipeline:
def test_should_set_referer_when_get_media_requests(self, mock_request):
mock_item = {'referer': sentinel.referer, 'file_urls': [getattr(sentinel, f'url-{i}') for i in range(1, 10)]}

list(BasePipeline('mock_path').get_media_requests(mock_item, None))
list(BasePipeline('mock_path', crawler=None).get_media_requests(mock_item, None))

calls = [
call(getattr(sentinel, f'url-{i}'), headers={'referer': sentinel.referer})
call(getattr(sentinel, f'url-{i}'), headers={'referer': sentinel.referer}, dont_filter=True)
for i in range(1, 10)
]
mock_request.assert_has_calls(calls, any_order=True)
Expand All @@ -24,7 +24,7 @@ def test_file_path_should_call_item_get_filepath(self):
mock_info = MagicMock()
mock_item = MagicMock()

BasePipeline('mock_path').file_path(mock_request, None, mock_info, item=mock_item)
BasePipeline('mock_path', crawler=None).file_path(mock_request, None, mock_info, item=mock_item)

mock_item.get_filepath.assert_called_once_with(mock_request.url, mock_info.spider)

Expand All @@ -36,11 +36,11 @@ def test_process_item_should_drop_item_when_cbz_file_already_exist(self, tmp_pat
mock_item.get_folder_name.return_value = 'abc'

with pytest.raises(DropItem):
ComicPipeline(str(tmp_path)).process_item(mock_item, None)
ComicPipeline(str(tmp_path), crawler=None).process_item(mock_item, None)

@patch('favorites_crawler.pipelines.create_comic_archive')
def test_should_create_comic_archive_when_close_spider(self, mock_create_comic_archive, tmp_path):
pipeline = ComicPipeline('mock_path')
pipeline = ComicPipeline('mock_path', crawler=None)
pipeline.files_path = tmp_path
(tmp_path / 'comic').mkdir()
pipeline.comic_comments = {'comic': b'comment'}
Expand All @@ -51,7 +51,7 @@ def test_should_create_comic_archive_when_close_spider(self, mock_create_comic_a

@patch('favorites_crawler.pipelines.create_comic_archive')
def test_should_not_create_comic_archive_when_comic_comments_is_empty(self, mock_create_comic_archive, tmp_path):
pipeline = ComicPipeline('mock_path')
pipeline = ComicPipeline('mock_path', crawler=None)
pipeline.comic_comments = {}

pipeline.close_spider(None)
Expand Down

0 comments on commit ecd5f2d

Please sign in to comment.