Skip to content

Commit

Permalink
Merge pull request #67 from RyouMon/fix-load-cookie
Browse files Browse the repository at this point in the history
Fix load_cookie
  • Loading branch information
RyouMon authored Dec 6, 2024
2 parents d5fe54c + 19f3462 commit feca3cb
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 21 deletions.
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ unidecode==1.3.8
langdetect==1.0.9
pykakasi==2.2.1
gppt==4.1.0
typer>=0.14.0
typer>=0.14.0
loguru>=0.7.2
14 changes: 11 additions & 3 deletions src/favorites_crawler/commands/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,11 @@
from scrapy.utils.project import get_project_settings
from scrapy.spiderloader import SpiderLoader

from favorites_crawler.constants.domains import LMMPIC_DOMAIN, NHENTAI_DOMAIN, TWITTER_DOMAIN
from favorites_crawler.utils.config import load_config, overwrite_spider_settings
from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME
from favorites_crawler.utils.auth import refresh_pixiv_token
from favorites_crawler.utils.cookies import load_cookie

app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)

Expand All @@ -36,20 +38,26 @@ def crawl_pixiv():
@app.command('nhentai')
def crawl_nhentai():
"""Crawl your favorite comics from nhentai."""
crawl('nhentai')
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
cookies = load_cookie(NHENTAI_DOMAIN, favors_home)
crawl('nhentai', cookies=cookies)


@app.command('x')
@app.command('twitter')
def crawl_twitter():
"""Crawl your favorite pictures from twitter."""
crawl('twitter')
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
cookies = load_cookie(TWITTER_DOMAIN, favors_home)
crawl('twitter', cookies=cookies)


@app.command('lemon')
def crawl_lemon(id_list: list[str] = typer.Option([], '--id', '-i')):
"""Crawl your favorite photo albums from lemon."""
crawl('lemon', id_list=id_list)
favors_home = os.path.expanduser(os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME))
cookies = load_cookie(LMMPIC_DOMAIN, favors_home)
crawl('lemon', id_list=id_list, cookies=cookies)


def spider_closed(spider):
Expand Down
5 changes: 0 additions & 5 deletions src/favorites_crawler/spiders/lemon.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from favorites_crawler.itemloaders import LemonPicPostItemLoader
from favorites_crawler.constants.endpoints import LEMON_PIC_USER_CENTER_URL, LEMON_PIC_POST_URL_PATTERN
from favorites_crawler.constants.domains import LMMPIC_DOMAIN
from favorites_crawler.utils.cookies import load_cookie


class LemonSpider(BaseSpider):
Expand All @@ -25,10 +24,6 @@ class LemonSpider(BaseSpider):
'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0},
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cookies = load_cookie(LMMPIC_DOMAIN)

def start_requests(self):
if hasattr(self, 'id_list') and self.id_list:
self.logger.debug('GET id_list: %s', self.id_list)
Expand Down
5 changes: 0 additions & 5 deletions src/favorites_crawler/spiders/nhentai.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from favorites_crawler.itemloaders import NHentaiGalleryItemLoader
from favorites_crawler.constants.endpoints import NHENTAI_USER_FAVORITES_URL
from favorites_crawler.constants.domains import NHENTAI_DOMAIN
from favorites_crawler.utils.cookies import load_cookie


class NHentaiSpider(BaseSpider):
Expand All @@ -27,10 +26,6 @@ class NHentaiSpider(BaseSpider):
'ITEM_PIPELINES': {'favorites_crawler.pipelines.ComicPipeline': 0},
}

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cookies = load_cookie(NHENTAI_DOMAIN)

def start_requests(self):
yield Request(NHENTAI_USER_FAVORITES_URL, cookies=self.cookies)

Expand Down
2 changes: 0 additions & 2 deletions src/favorites_crawler/spiders/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from favorites_crawler.itemloaders import TwitterTweetItemLoader
from favorites_crawler.constants.domains import TWITTER_DOMAIN
from favorites_crawler.constants.endpoints import TWITTER_LIKES_URL
from favorites_crawler.utils.cookies import load_cookie
from favorites_crawler.utils.common import DictRouter


Expand All @@ -31,7 +30,6 @@ def current_url(self):

def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.cookies = load_cookie(TWITTER_DOMAIN)
self.base_url = TWITTER_LIKES_URL.format(id=self.custom_settings.get('LIKES_ID'))
self.variables = {
"userId": str(self.custom_settings.get('USER_ID')),
Expand Down
17 changes: 12 additions & 5 deletions src/favorites_crawler/utils/cookies.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
from __future__ import annotations

import os
from pathlib import Path
from http.cookiejar import MozillaCookieJar


cookie_home = os.path.expanduser('~/.favorites_crawler')
from loguru import logger


def load_cookie(domain):
def load_cookie(domain: str, home: str | Path) -> dict:
"""Load 'Netscape HTTP Cookie File' as dict"""
cookiejar = MozillaCookieJar()
cookiejar.load(os.path.join(cookie_home, f'{domain}_cookies.txt'))
try:
cookiejar = MozillaCookieJar()
cookie_file = os.path.join(home, f'{domain}_cookies.txt')
cookiejar.load(cookie_file)
except Exception as e:
logger.error('Failed to load cookie {}, {!r}', cookie_file, e)
return {}
return {getattr(c, 'name'): getattr(c, 'value') for c in cookiejar}
36 changes: 36 additions & 0 deletions tests/test_utils/test_cookies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from favorites_crawler.utils.cookies import load_cookie


class TestLoadCookie:
def test_load_cookie_when_file_exists(self, tmp_path):
domain = 'localhost'
cookie_file = tmp_path / f'{domain}_cookies.txt'
cookie_file.touch()
cookie_file.write_text(
"""# Netscape HTTP Cookie File
# http://curl.haxx.se/rfc/cookie_spec.html
# This is a generated file! Do not edit.
localhost FALSE / TRUE 9933144989 User-Agent Test
"""
)

cookie = load_cookie(domain, tmp_path)

assert cookie == {'User-Agent': 'Test'}

def test_load_cookie_when_file_not_exists(self, tmp_path):
domain = 'localhost'

cookie = load_cookie(domain, tmp_path)

assert cookie == {}

def test_load_cookie_when_file_invalid(self, tmp_path):
domain = 'localhost'
cookie_file = tmp_path / f'{domain}_cookies.txt'
cookie_file.touch()
cookie_file.write_text('')
cookie = load_cookie(domain, tmp_path)

assert cookie == {}

0 comments on commit feca3cb

Please sign in to comment.