From 1ac07a645c994bd8debe99f07847c212fff9b641 Mon Sep 17 00:00:00 2001 From: wen Date: Fri, 5 Jan 2024 01:23:45 +0800 Subject: [PATCH 1/4] fix: fix typo --- src/favorites_crawler/__init__.py | 2 +- tests/test_init.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/favorites_crawler/__init__.py b/src/favorites_crawler/__init__.py index e43cc51..de31354 100644 --- a/src/favorites_crawler/__init__.py +++ b/src/favorites_crawler/__init__.py @@ -35,7 +35,7 @@ def crawl(name, **kwargs): def spider_closed(spider): stats = spider.crawler.stats.get_stats() - if not (stats.get('item_scrapped_count', 0) + stats.get('item_dropped_count', 0)): + if not (stats.get('item_scraped_count', 0) + stats.get('item_dropped_count', 0)): logger.warning('Your cookies or token may have expired.') diff --git a/tests/test_init.py b/tests/test_init.py index 384898a..07fc46d 100644 --- a/tests/test_init.py +++ b/tests/test_init.py @@ -19,7 +19,7 @@ def test_spider_closed_should_call_warn(mock_logger): @pytest.mark.parametrize('item_scrapped_count,item_dropped_count', ((1, 0), (0, 1), (1, 1))) def test_spider_closed_should_not_call_warn(mock_logger, item_scrapped_count, item_dropped_count): mock_spider = MagicMock() - stats = {'item_scrapped_count': item_scrapped_count, 'item_dropped_count': item_dropped_count} + stats = {'item_scraped_count': item_scrapped_count, 'item_dropped_count': item_dropped_count} mock_spider.crawler.stats.get_stats.return_value = {k: v for k, v in stats.items() if v} spider_closed(mock_spider) From db1e7779a2be37a1f0d8d388f0d17e4bcfcb964a Mon Sep 17 00:00:00 2001 From: wen Date: Fri, 5 Jan 2024 01:35:15 +0800 Subject: [PATCH 2/4] feat: add tags to iptc/keywords of the images of pixiv and yandere. --- requirements.txt | 3 +- src/favorites_crawler/constants/headers.py | 2 +- src/favorites_crawler/itemloaders.py | 8 +- src/favorites_crawler/items.py | 3 - src/favorites_crawler/pipelines.py | 80 +++++++++++++++---- src/favorites_crawler/processors.py | 20 ++++- src/favorites_crawler/settings.py | 6 +- src/favorites_crawler/spiders/lemon.py | 3 + src/favorites_crawler/spiders/nhentai.py | 1 + src/favorites_crawler/spiders/pixiv.py | 1 + src/favorites_crawler/spiders/yandere.py | 23 ++++-- src/favorites_crawler/spiders/yandere_vote.py | 10 +-- src/favorites_crawler/utils/files.py | 8 +- tests/test_processors.py | 2 +- tests/test_utils/test_files.py | 23 ++++-- 15 files changed, 142 insertions(+), 51 deletions(-) diff --git a/requirements.txt b/requirements.txt index 1cd0298..984721a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,4 +3,5 @@ requests==2.27.1 pyyaml==6.0 itemadapter==0.4.0 itemloaders==1.0.4 -Twisted==22.10.0 \ No newline at end of file +Twisted==22.10.0 +pyexiftool==0.5.6 \ No newline at end of file diff --git a/src/favorites_crawler/constants/headers.py b/src/favorites_crawler/constants/headers.py index b4b73f3..f834c84 100644 --- a/src/favorites_crawler/constants/headers.py +++ b/src/favorites_crawler/constants/headers.py @@ -4,5 +4,5 @@ PIXIV_REQUEST_HEADERS = { 'APP-OS': 'ios', 'APP-OS-Version': '12.6', - 'Accept-Language': 'zh-CH;zh;q=0.9;en;q=0.8', + 'Accept-Language': 'en', } diff --git a/src/favorites_crawler/itemloaders.py b/src/favorites_crawler/itemloaders.py index ac181a1..29b9472 100644 --- a/src/favorites_crawler/itemloaders.py +++ b/src/favorites_crawler/itemloaders.py @@ -1,11 +1,11 @@ from itemloaders import ItemLoader -from itemloaders.processors import Join, Compose, MapCompose +from itemloaders.processors import Compose, MapCompose from favorites_crawler import items from favorites_crawler.processors import take_first, identity, get_nhentai_id, wrap_credits, \ original_url_from_nhentai_thumb_url, select_best_nhentai_title, clean_nhentai_title, \ get_year_from_iso_format, get_month_from_iso_format, get_series_from_title, get_volume_from_title, \ - clean_parodies, get_page + clean_parodies, get_lemon_page, get_pixiv_tags, get_yandere_tags class PixivIllustItemLoader(ItemLoader): @@ -15,6 +15,7 @@ class PixivIllustItemLoader(ItemLoader): file_urls_out = identity user_id_out = Compose(take_first, str) + tags_out = get_pixiv_tags class YanderePostItemLoader(ItemLoader): @@ -24,6 +25,7 @@ class YanderePostItemLoader(ItemLoader): file_urls_out = identity artist_out = Compose(take_first, lambda s: s.strip()) + tags_out = Compose(take_first, get_yandere_tags) class NHentaiGalleryItemLoader(ItemLoader): @@ -51,4 +53,4 @@ class LemonPicPostItemLoader(ItemLoader): file_urls_out = identity tags_out = identity - page_out = Compose(take_first, get_page) + page_out = Compose(take_first, get_lemon_page) diff --git a/src/favorites_crawler/items.py b/src/favorites_crawler/items.py index 1dfd5ed..ed2c860 100644 --- a/src/favorites_crawler/items.py +++ b/src/favorites_crawler/items.py @@ -34,7 +34,6 @@ def get_folder_name(self, spider): @dataclass class ComicBookInfoItem: - title: str = field(default=None, metadata={'is_comic_info': True}) series: str = field(default=None, metadata={'is_comic_info': True}) publisher: str = field(default=None, metadata={'is_comic_info': True}) @@ -71,7 +70,6 @@ def get_comic_info(self): @dataclass class PixivIllustItem(BaseItem): - user_id: str = field(default=None) def get_folder_name(self, spider): @@ -83,7 +81,6 @@ def get_folder_name(self, spider): @dataclass class YanderePostItem(BaseItem): - artist: str = field(default=None) def get_folder_name(self, spider): diff --git a/src/favorites_crawler/pipelines.py b/src/favorites_crawler/pipelines.py index 1933734..3d3c3b2 100644 --- a/src/favorites_crawler/pipelines.py +++ b/src/favorites_crawler/pipelines.py @@ -11,6 +11,7 @@ from scrapy.pipelines.files import FilesPipeline from itemadapter import ItemAdapter from twisted.python.failure import Failure +from exiftool import ExifToolHelper from favorites_crawler.utils.files import create_comic_archive @@ -18,8 +19,71 @@ logger = logging.getLogger(__name__) -class FavoritesFilePipeline(FilesPipeline): +class BasePipeline(FilesPipeline): + def get_media_requests(self, item, info): + item_dict = ItemAdapter(item).asdict() + referer = item_dict.get('referer') + return (Request(url, headers={'referer': referer}) for url in item_dict.get(self.files_urls_field, ())) + + def file_path(self, request, response=None, info=None, *, item=None): + return item.get_filepath(request.url, info.spider) + + def item_completed(self, results, item, info): + for result in info.downloaded.values(): + if isinstance(result, Failure): + logger.error('Error when downloading file: %s', result.value) + return super().item_completed(results, item, info) + + +class PicturePipeline(BasePipeline): + """Save image and add iptc/keywords to it.""" + def __init__(self, store_uri, download_func=None, settings=None): + super().__init__(store_uri, download_func=download_func, settings=settings) + self.write_iptc_keywords = settings.getbool('ENABLE_WRITE_IPTC_KEYWORDS', False) + if self.write_iptc_keywords: + try: + self.exif_tool = ExifToolHelper(executable=settings.get('EXIF_TOOL_EXECUTABLE', None)) + self.exif_tool.run() + except Exception as e: + logger.error('Failed to load exiftool, consider to install it or setting EXIF_TOOL_EXECUTABLE. ' + '\nException: %r', e) + self.exif_tool = None + else: + self.exif_tool = None + + def close_spider(self, _): + if self.exif_tool and self.exif_tool.running: + self.exif_tool.terminate() + + def item_completed(self, results, item, info): + item = super().item_completed(results, item, info) + if not self.exif_tool: + return item + for success, result in results: + if not (success and item.tags): + continue + path = item.get_filepath(result['url'], info.spider) + try: + msg = self.exif_tool.set_tags( + Path(self.store.basedir) / path, + {'Keywords': item.tags}, + ['-overwrite_original'], + ).rstrip() + except Exception as e: + logger.error('Failed to write tags: %r to "%s", result: %r', item.tags, path, e) + else: + if msg == '1 image files updated': + info.spider.crawler.stats.inc_value('iptc_status_count/updated') + logger.debug('Success to write tags: %r to "%s", result: %s', item.tags, path, msg) + else: + logger.error('Failed to write tags: %r to "%s", result: %s', item.tags, path, msg) + + return item + + +class ComicPipeline(BasePipeline): + """Archive comic as cbz and add ComicBookInfo to it.""" def __init__(self, store_uri, **kwargs): super().__init__(store_uri, **kwargs) self.files_path = Path(store_uri).resolve() @@ -44,17 +108,3 @@ def process_item(self, item, spider): self.comic_comments[title] = bytes(comment, encoding='utf-8') return super().process_item(item, spider) - - def get_media_requests(self, item, info): - item_dict = ItemAdapter(item).asdict() - referer = item_dict.get('referer') - return (Request(url, headers={'referer': referer}) for url in item_dict.get(self.files_urls_field, ())) - - def file_path(self, request, response=None, info=None, *, item=None): - return item.get_filepath(request.url, info.spider) - - def item_completed(self, results, item, info): - for result in info.downloaded.values(): - if isinstance(result, Failure): - logger.error('Error when downloading file: %s', result.value) - return super().item_completed(results, item, info) diff --git a/src/favorites_crawler/processors.py b/src/favorites_crawler/processors.py index 0fed0c6..da8009e 100644 --- a/src/favorites_crawler/processors.py +++ b/src/favorites_crawler/processors.py @@ -120,8 +120,26 @@ def clean_parodies(parodies): return parodies.strip() -def get_page(url): +def get_lemon_page(url): match = re.match(r'https://www\..+html/(\d+)', url) if not match: return 1 return int(match.group(1)) + + +def get_pixiv_tags(tags): + """Return en-us tags.""" + results = set() + for tag in tags: + if tag.get('name'): + results.add(tag['name'].strip().replace(' ', '_').lower()) + if tag.get('translated_name'): + results.add(tag['translated_name'].strip().replace(' ', '_').lower()) + return list(filter( + lambda x: re.match(r'^[ -~]+$', x), # ascii only + results, + )) + + +def get_yandere_tags(tags): + return tags.split(' ') diff --git a/src/favorites_crawler/settings.py b/src/favorites_crawler/settings.py index bff1639..1bc2994 100644 --- a/src/favorites_crawler/settings.py +++ b/src/favorites_crawler/settings.py @@ -27,6 +27,8 @@ TELNETCONSOLE_ENABLED = False -ITEM_PIPELINES = {'favorites_crawler.pipelines.FavoritesFilePipeline': 0} - FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER = False + +# ExifTool settings +ENABLE_WRITE_IPTC_KEYWORDS = True +EXIF_TOOL_EXECUTABLE = None diff --git a/src/favorites_crawler/spiders/lemon.py b/src/favorites_crawler/spiders/lemon.py index c761748..4fc1923 100644 --- a/src/favorites_crawler/spiders/lemon.py +++ b/src/favorites_crawler/spiders/lemon.py @@ -21,6 +21,9 @@ class LemonSpider(BaseSpider): callback='parse', ), ] + custom_settings = { + 'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0}, + } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/src/favorites_crawler/spiders/nhentai.py b/src/favorites_crawler/spiders/nhentai.py index 2fa7b27..ee2e181 100644 --- a/src/favorites_crawler/spiders/nhentai.py +++ b/src/favorites_crawler/spiders/nhentai.py @@ -24,6 +24,7 @@ class NHentaiSpider(BaseSpider): ) custom_settings = { 'CONCURRENT_REQUESTS': 5, + 'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0}, } def __init__(self, *args, **kwargs): diff --git a/src/favorites_crawler/spiders/pixiv.py b/src/favorites_crawler/spiders/pixiv.py index 39c08d5..785d437 100644 --- a/src/favorites_crawler/spiders/pixiv.py +++ b/src/favorites_crawler/spiders/pixiv.py @@ -20,6 +20,7 @@ class PixivSpider(BaseSpider): 'DEFAULT_REQUEST_HEADERS': PIXIV_REQUEST_HEADERS, # Add PixivAuthorizationMiddleware after DefaultHeadersMiddleware 'DOWNLOADER_MIDDLEWARES': {'favorites_crawler.middlewares.PixivAuthorizationMiddleware': 450}, + 'ITEM_PIPELINES': {'favorites_crawler.pipelines.PicturePipeline': 0}, } def start_requests(self): diff --git a/src/favorites_crawler/spiders/yandere.py b/src/favorites_crawler/spiders/yandere.py index 3311573..dfa0600 100644 --- a/src/favorites_crawler/spiders/yandere.py +++ b/src/favorites_crawler/spiders/yandere.py @@ -8,7 +8,7 @@ from favorites_crawler.constants.domains import YANDERE_DOMAIN from favorites_crawler.itemloaders import YanderePostItemLoader from favorites_crawler.constants.endpoints import YANDERE_LIST_POST_URL, YANDERE_SHOW_POST_URL -from favorites_crawler.utils.files import list_yandere_id +from favorites_crawler.utils.files import list_yandere_post class YandereSpider(BaseSpider): @@ -17,18 +17,17 @@ class YandereSpider(BaseSpider): allowed_domains = (YANDERE_DOMAIN, ) custom_settings = { 'CONCURRENT_REQUESTS': 5, + 'ITEM_PIPELINES': {'favorites_crawler.pipelines.PicturePipeline': 0}, } def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.limit = 100 self.params = {'page': 1, 'limit': self.limit} - self.posts = set() + self.posts = {} def start_requests(self): - self.posts = set(list_yandere_id(pathlib.Path(self.settings.get('FILES_STORE')), include_subdir=True)) - self.logger.debug(f'{len(self.posts)} posts will skip download.') - + self.posts = list_yandere_post(pathlib.Path(self.settings.get('FILES_STORE')), include_subdir=True) username = self.custom_settings.get('USERNAME') if not username: raise CloseSpider('Did you run "favors login yandere"?') @@ -45,12 +44,20 @@ def parse_start_url(self, response, **kwargs): yield Request(f'{YANDERE_LIST_POST_URL}?{urlencode(self.params)}', callback=self.parse_start_url) for post in posts: - if str(post['id']) in self.posts: - continue + post_id = str(post['id']) + if post_id in self.posts: + path = self.posts[post_id] # type: pathlib.Path + if (path.name == + YanderePostItemLoader.default_item_class().get_filename(post['file_url'], self)): + continue + path.unlink(missing_ok=True) + loader = YanderePostItemLoader() loader.add_value('file_urls', post['file_url']) + loader.add_value('tags', post['tags']) + if self.settings.getbool('ENABLE_ORGANIZE_BY_ARTIST'): - yield Request(YANDERE_SHOW_POST_URL.format(id=post['id']), + yield Request(YANDERE_SHOW_POST_URL.format(id=post_id), callback=self.parse, cb_kwargs={'loader': loader}) else: yield loader.load_item() diff --git a/src/favorites_crawler/spiders/yandere_vote.py b/src/favorites_crawler/spiders/yandere_vote.py index 5587044..41c2e7f 100644 --- a/src/favorites_crawler/spiders/yandere_vote.py +++ b/src/favorites_crawler/spiders/yandere_vote.py @@ -5,7 +5,7 @@ from favorites_crawler.spiders import BaseSpider from favorites_crawler.constants.domains import YANDERE_DOMAIN from favorites_crawler.constants.endpoints import YANDERE_VOTE_POST_URL -from favorites_crawler.utils.files import list_yandere_id +from favorites_crawler.utils.files import list_yandere_post class YandereVoteSpider(BaseSpider): @@ -27,12 +27,10 @@ def __init__(self, csrf_token, cookie, score, path, *args, **kwargs): self.path = Path(path) def start_requests(self): - yandere_id_list = list_yandere_id(self.path) - self.crawler.stats.set_value('file_count', len(yandere_id_list)) - yandere_id_set = set(yandere_id_list) - self.crawler.stats.set_value('voted/expected', len(yandere_id_set)) + yandere_id_list = list(list_yandere_post(self.path).keys()) + self.crawler.stats.set_value('voted/expected', len(yandere_id_list)) - for i in yandere_id_set: + for i in yandere_id_list: yield FormRequest(YANDERE_VOTE_POST_URL, formdata={'id': str(i), 'score': str(self.score)}, cookies=self.cookies, headers=self.headers, diff --git a/src/favorites_crawler/utils/files.py b/src/favorites_crawler/utils/files.py index 4fed1fd..45b96a7 100644 --- a/src/favorites_crawler/utils/files.py +++ b/src/favorites_crawler/utils/files.py @@ -19,13 +19,13 @@ def create_comic_archive(path: Path, comment=b''): return archive_name -def list_yandere_id(path=Path('.'), include_subdir=False, result=None): - result = [] if result is None else result +def list_yandere_post(path=Path('.'), include_subdir=False, result=None): + result = {} if result is None else result for file_or_dir in path.iterdir(): if file_or_dir.is_file(): id_ = get_yandere_post_id(file_or_dir.name) if id_: - result.append(id_) + result[id_] = file_or_dir elif include_subdir: - list_yandere_id(file_or_dir, include_subdir, result) + list_yandere_post(file_or_dir, include_subdir, result) return result diff --git a/tests/test_processors.py b/tests/test_processors.py index 1bab48f..5efdc5e 100644 --- a/tests/test_processors.py +++ b/tests/test_processors.py @@ -103,6 +103,6 @@ def test_clean_parodies(parodies, expected): ('https://www.xxx.com/811102.html/3', 3), )) def test_get_page(url, expected): - actual = processors.get_page(url) + actual = processors.get_lemon_page(url) assert actual == expected diff --git a/tests/test_utils/test_files.py b/tests/test_utils/test_files.py index 008a246..6204f48 100644 --- a/tests/test_utils/test_files.py +++ b/tests/test_utils/test_files.py @@ -3,7 +3,7 @@ import pytest -from favorites_crawler.utils.files import create_comic_archive, list_yandere_id +from favorites_crawler.utils.files import create_comic_archive, list_yandere_post @pytest.fixture @@ -43,7 +43,7 @@ def test_should_write_comment_to_archive(self, comic_path): assert zf.comment == b"I'm a comic." -class TestListYandereId: +class TestListYanderePost: def test_list_yandere_id(self, tmp_path: Path): pictures = [ @@ -57,9 +57,15 @@ def test_list_yandere_id(self, tmp_path: Path): (tmp_path / 'sub').mkdir() (tmp_path / 'sub' / 'yande.re 2 b c m.jpeg').touch() - actual = list_yandere_id(tmp_path) + actual = list_yandere_post(tmp_path) + actual = {k: v.name for k, v in actual.items()} - assert sorted(actual) == ['1', '10', '2', '20'] + assert actual == { + '1': 'yande.re 1 b c m.jpg', + '2': 'yande.re 2 b c m.png', + '10': 'yande.re 10 b c m.jpg', + '20': 'yande.re 20 b c m.jpeg' + } def test_list_yandere_id_include_subdir(self, tmp_path: Path): (tmp_path / 'yande.re 1 b c m.jpg').touch() @@ -68,6 +74,11 @@ def test_list_yandere_id_include_subdir(self, tmp_path: Path): (tmp_path / 'sub2').mkdir() (tmp_path / 'sub2' / 'yande.re 3 b c m.jpeg').touch() - actual = list_yandere_id(tmp_path, include_subdir=True) + actual = list_yandere_post(tmp_path, include_subdir=True) + actual = {k: v.name for k, v in actual.items()} - assert sorted(actual) == ['1', '2', '3'] + assert actual == { + '1': 'yande.re 1 b c m.jpg', + '2': 'yande.re 2 b c m.jpeg', + '3': 'yande.re 3 b c m.jpeg', + } From 60c6df92bd98a1eea3e0807f9d511aa95c0c7218 Mon Sep 17 00:00:00 2001 From: wen Date: Sun, 7 Jan 2024 10:17:31 +0800 Subject: [PATCH 3/4] doc: update README.md --- README.md | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 89911f6..9af87eb 100644 --- a/README.md +++ b/README.md @@ -133,20 +133,25 @@ yandere: ## Organize file by artist if you want to organize pixiv illust by user, add this line to your config: ```yaml -... pixiv: # FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER: true # (Deprecation) ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config - ... -... ``` if you want to organize yandere post by artist, add this line to your config: ```yaml -... yandere: ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config - ... -... +``` + +## Store tags to IPTC/Keywords +only support pixiv and yandere. +```yaml +yandere: + ENABLE_WRITE_IPTC_KEYWORDS: true # default: true + EXIF_TOOL_EXECUTABLE: '' # default None +pixiv: + ENABLE_WRITE_IPTC_KEYWORDS: true # default: true + EXIF_TOOL_EXECUTABLE: '' # default None ``` # Restore your favorites From ae3f7f2c64dded0db172c2c55c11f7a4a2f1cd6f Mon Sep 17 00:00:00 2001 From: wen Date: Sun, 7 Jan 2024 10:27:23 +0800 Subject: [PATCH 4/4] test: fix pipeline tests --- tests/test_pipelines.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/tests/test_pipelines.py b/tests/test_pipelines.py index c82fb0d..8822d2a 100644 --- a/tests/test_pipelines.py +++ b/tests/test_pipelines.py @@ -3,24 +3,15 @@ import pytest from scrapy.exceptions import DropItem -from favorites_crawler.pipelines import FavoritesFilePipeline +from favorites_crawler.pipelines import BasePipeline, ComicPipeline -class TestFavoritesFilePipeline: - - def test_process_item_should_drop_item_when_cbz_file_already_exist(self, tmp_path): - (tmp_path / 'abc.cbz').touch() - mock_item = MagicMock() - mock_item.get_folder_name.return_value = 'abc' - - with pytest.raises(DropItem): - FavoritesFilePipeline(str(tmp_path)).process_item(mock_item, None) - +class TestBasePipeline: @patch('favorites_crawler.pipelines.Request') def test_should_set_referer_when_get_media_requests(self, mock_request): mock_item = {'referer': sentinel.referer, 'file_urls': [getattr(sentinel, f'url-{i}') for i in range(1, 10)]} - list(FavoritesFilePipeline('mock_path').get_media_requests(mock_item, None)) + list(BasePipeline('mock_path').get_media_requests(mock_item, None)) calls = [ call(getattr(sentinel, f'url-{i}'), headers={'referer': sentinel.referer}) @@ -33,13 +24,23 @@ def test_file_path_should_call_item_get_filepath(self): mock_info = MagicMock() mock_item = MagicMock() - FavoritesFilePipeline('mock_path').file_path(mock_request, None, mock_info, item=mock_item) + BasePipeline('mock_path').file_path(mock_request, None, mock_info, item=mock_item) mock_item.get_filepath.assert_called_once_with(mock_request.url, mock_info.spider) + +class TestComicPipeline: + def test_process_item_should_drop_item_when_cbz_file_already_exist(self, tmp_path): + (tmp_path / 'abc.cbz').touch() + mock_item = MagicMock() + mock_item.get_folder_name.return_value = 'abc' + + with pytest.raises(DropItem): + ComicPipeline(str(tmp_path)).process_item(mock_item, None) + @patch('favorites_crawler.pipelines.create_comic_archive') def test_should_create_comic_archive_when_close_spider(self, mock_create_comic_archive, tmp_path): - pipeline = FavoritesFilePipeline('mock_path') + pipeline = ComicPipeline('mock_path') pipeline.files_path = tmp_path (tmp_path / 'comic').mkdir() pipeline.comic_comments = {'comic': b'comment'} @@ -50,7 +51,7 @@ def test_should_create_comic_archive_when_close_spider(self, mock_create_comic_a @patch('favorites_crawler.pipelines.create_comic_archive') def test_should_not_create_comic_archive_when_comic_comments_is_empty(self, mock_create_comic_archive, tmp_path): - pipeline = FavoritesFilePipeline('mock_path') + pipeline = ComicPipeline('mock_path') pipeline.comic_comments = {} pipeline.close_spider(None)