Merge pull request #64 from RyouMon/fix-login

Update login method of twitter and nhentai. Add FAVORS_HOME env var.
RyouMon · Dec 4, 2024 · 29d2ad9 · 29d2ad9
2 parents ecd5f2d + 27e47cf
commit 29d2ad9
Show file tree

Hide file tree

Showing 10 changed files with 404 additions and 99 deletions.
diff --git a/README.md b/README.md
@@ -64,31 +64,45 @@ favors login --help
    ```
 
 ## Login Yandere
-1. run command:
-   ```
-   favors login yandere
-   ```
-2. input your username and hit the Enter key.
+run command:
+```
+favors login yandere -u {username}
+```
 
 ## Login NHentai
-1. Open nhentai on browser and login.
-2. Use "Get cookies.txt" browser extension download cookie file.
-3. Copy cookie file to {user_home}/.favorites_crawler.
+1. Get User-Agent and Cookie File
+   1. Open nhentai and login.
+   2. Open dev console (F12) and switch to network tab.
+   3. Open any comic.
+   4. Copy user-agent from any request.
+   5. Use "Get cookies.txt" browser extension download cookie file.
+2. Execute command:
+   ```commandline
+   favors login nhentai -u "{User-Agent}" -c "{Cookie File}"
+   ```
 
 ## Login Twitter
-1. run command
-    ```
-    favors login twitter
-    ```
-2. input your username, after press Enter, likes page will open in browser.
-3. Open dev console (F12) and switch to network tab.
-4. Enable persistent logging ("Preserve log").
-5. Type into the filter field: Likes?
-6. Refresh Page.
-7. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...) input on terminal.
-8. Use "Get cookies.txt" browser extension download cookie file. 
-9. Copy cookie file to {user_home}/.favorites_crawler.
 
+1. Get Authorization, X-Csrf-Token RequestURL and Cookie File
+   1. Open [x.com](https://x.com/) and login, get to your "Likes" page
+   2. Open dev console (F12) and switch to network tab.
+   3. Enable persistent logging ("Preserve log").
+   4. Type into the filter field: Likes?
+   5. Refresh Page.
+   6. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...)
+   7. Use "Get cookies.txt" browser extension download cookie file. 
+2. Execute command:
+   ```commandline
+   favors login x -at "{Authorization}" -ct "{X-Csrf-Token}" -u "{RequestURL}" -c "{Cookie File}"
+   ```
+Example:
+```commandline
+favors login x -at "Bearer AAAAAAAAAAAAA..." -ct ... -u "https://x.com/i/api/graphql/.../Likes?..." -c "C:\Users\xxx\Downloads\x.com_cookies.txt"
+```
+
+Note: Request URL will make the entire command very long. 
+If you cannot enter such a long command in the macOS terminal, 
+you can write the command in a sh file and execute it.
 
 # Crawl
 
@@ -121,7 +135,11 @@ favors crawl twitter
 ```
 
 # Config
-Config file locate on `{your_home}/.favorites_crawler/config.yml`. 
+
+Config file `config.yml` locate on `FAVORS_HOME`, 
+by default `FAVORS_HOME` is `{your_home}/.favorites_crawler`. 
+You can change `FAVORS_HOME` by set environment variable.
+
 You can set any [scrapy built-in settings](https://docs.scrapy.org/en/latest/topics/settings.html#built-in-settings-reference) in this file.
 
 By default, file content likes this:

diff --git a/src/favorites_crawler/commands/crawl.py b/src/favorites_crawler/commands/crawl.py
@@ -9,6 +9,7 @@
 from scrapy.spiderloader import SpiderLoader
 
 from favorites_crawler.utils.config import load_config, overwrite_spider_settings
+from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME
 
 app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)
 
@@ -70,13 +71,10 @@ def crawl(name, **kwargs):
     :param kwargs: kwargs passed to spider's __init__ method
     """
     spider = spider_loader.load(name)
-    overwrite_spider_settings(spider, scrapy_settings, load_config())
+    favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
+    overwrite_spider_settings(spider, scrapy_settings, load_config(favors_home))
     process = CrawlerProcess(scrapy_settings)
     process.crawl(spider, **kwargs)
     for crawler in process.crawlers:
         crawler.signals.connect(spider_closed, signal=signals.spider_closed)
     process.start()
-
-
-if __name__ == '__main__':
-    crawl('pixiv')
diff --git a/src/favorites_crawler/commands/login.py b/src/favorites_crawler/commands/login.py
@@ -1,14 +1,12 @@
-import re
+import os
+import shutil
 from typing import Optional
-from urllib.parse import unquote
-from webbrowser import open as open_url
 
 import typer
-from selenium.common import NoSuchWindowException
 
-from favorites_crawler.constants.endpoints import TWITTER_PROFILE_LIKES_URL
-from favorites_crawler.utils.auth import CustomGetPixivToken
+from favorites_crawler.utils.auth import CustomGetPixivToken, parse_twitter_likes_url, parser_twitter_likes_features
 from favorites_crawler.utils.config import dump_config, load_config
+from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME
 
 
 app = typer.Typer(help='Prepare auth information for crawling.', no_args_is_help=True)
@@ -35,13 +33,14 @@ def login_pixiv(
 
     If you do not provide your username and password, you will login manually on the web page
     """
-    config = load_config()
+    favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
+    config = load_config(favors_home)
     token_getter = CustomGetPixivToken()
     try:
         login_info = token_getter.login(username=username, password=password)
-    except NoSuchWindowException:
-        print('Failed to login.')
-        return
+    except Exception as e:
+        print(f'Failed to login. {e!r}')
+        exit(1)
 
     pixiv_config = config.setdefault('pixiv', {})
     try:
@@ -50,8 +49,9 @@ def login_pixiv(
         pixiv_config['REFRESH_TOKEN'] = login_info['refresh_token']
     except KeyError as e:
         print(f'Failed to login. {e!r}')
+        exit(1)
     else:
-        dump_config(config)
+        dump_config(config, favors_home)
         print("Login successful.")
 
 
@@ -65,49 +65,89 @@ def login_yandere(
     """
     Login to yandere.
     """
-    config = load_config()
+    favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
+    config = load_config(favors_home)
     yandere_config = config.setdefault('yandere', {})
     yandere_config['USERNAME'] = username
-    dump_config(config)
+    dump_config(config, favors_home)
     print("Login successful.")
 
 
 @app.command('x')
 @app.command('twitter')
 def login_twitter(
-        username: str = typer.Option(
-            ..., '-u', '--username',
-            help="Your twitter username."
+        auth_token: str = typer.Option(
+            ..., '-at', '--auth-token',
+            help='Authorization Token (Copy from Dev console)'
+        ),
+        csrf_token: str = typer.Option(
+            ..., '-ct', '--csrf-token',
+            help='Authorization Token (Copy from Dev console)'
+        ),
+        likes_url: str = typer.Option(
+            ..., '-u', '--likes-url',
+            help='Request URL of Likes API (Copy from Dev console)'
+        ),
+        cookie_file: str = typer.Option(
+            ..., '-c', '--cookie-file',
+            help='Netscape HTTP Cookie File, you can download it by "Get cookies.txt" browser extension.'
         )
 ):
     """
     Login to twitter.
 
-    1. After execute this command, likes page will open in browser.\n
+    1. Open twitter and login, get to your "Likes" page.\n
     2. Open dev console (F12) and switch to network tab.\n
     3. Enable persistent logging ("Preserve log").\n
     4. Type into the filter field: Likes?\n
     5. Refresh Page.\n
     6. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...) input on terminal.\n
-    7. Use "Get cookies.txt" browser extension download cookie file.\n
-    8. Copy cookie file to {user_home}/.favorites_crawler.
+    7. Use "Get cookies.txt" browser extension download cookie file.
     """
-    open_url(TWITTER_PROFILE_LIKES_URL.format(username=username))
-    config = load_config()
+    favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
+    config = load_config(favors_home)
     twitter_config = config.setdefault('twitter', {})
     try:
-        twitter_config['AUTHORIZATION'] = input('Authorization: ')
-        twitter_config['X_CSRF_TOKEN'] = input('X-Csrf-Token: ')
-        twitter_config['LIKES_ID'], twitter_config['USER_ID'] = parse_twitter_likes_url(input('Request URL: '))
-    except KeyboardInterrupt:
-        "Failed to login."
-        return
-    dump_config(config)
+        twitter_config['AUTHORIZATION'] = auth_token
+        twitter_config['X_CSRF_TOKEN'] = csrf_token
+        twitter_config['LIKES_ID'], twitter_config['USER_ID'] = parse_twitter_likes_url(likes_url)
+        twitter_config['FEATURES'] = parser_twitter_likes_features(likes_url)
+        shutil.copy(cookie_file, favors_home)
+    except Exception as e:
+        print(f"Failed to login: {e!r}")
+        exit(1)
+    dump_config(config, favors_home)
     print("Login successful.")
 
 
-def parse_twitter_likes_url(url):
-    """Parse USER_ID and LIKES_ID from URL"""
-    url = unquote(url).replace(' ', '')
-    match = re.match(r'^.+?graphql/(.+?)/.+?userId":"(.+?)".+$', url)
-    return match.groups()
+@app.command("nhentai")
+def login_nhentai(
+        user_agent: str = typer.Option(
+            ..., '-u', '--user-agent',
+            help='User Agent'
+        ),
+        cookie_file: str = typer.Option(
+            ..., '-c', '--cookie-file',
+            help='Netscape HTTP Cookie File, you can download it by "Get cookies.txt" browser extension.'
+        )
+):
+    """
+    Login to nhentai.
+
+    1. Open nhentai and login.\n
+    2. Open dev console (F12) and switch to network tab.\n
+    3. Open any comic.\n
+    4. Copy user-agent from any request.\n
+    5. Use "Get cookies.txt" browser extension download cookie file.
+    """
+    favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
+    config = load_config(favors_home)
+    nhentai_config = config.setdefault('nhentai', {})
+    try:
+        nhentai_config['USER_AGENT'] = user_agent
+        shutil.copy(cookie_file, favors_home)
+    except Exception as e:
+        print(f"Failed to login: {e!r}")
+        exit(1)
+    dump_config(config, favors_home)
+    print("Login successful.")
diff --git a/src/favorites_crawler/constants/endpoints.py b/src/favorites_crawler/constants/endpoints.py
@@ -11,4 +11,3 @@
 NHENTAI_USER_FAVORITES_URL = 'https://nhentai.net/favorites/'
 
 TWITTER_LIKES_URL = 'https://x.com/i/api/graphql/{id}/Likes'
-TWITTER_PROFILE_LIKES_URL = 'https://x.com/{username}/likes'
diff --git a/src/favorites_crawler/constants/path.py b/src/favorites_crawler/constants/path.py
@@ -0,0 +1 @@
+DEFAULT_FAVORS_HOME = '~/.favorites_crawler'
diff --git a/src/favorites_crawler/spiders/twitter.py b/src/favorites_crawler/spiders/twitter.py
@@ -18,6 +18,7 @@ class TwitterSpider(BaseSpider):
     custom_settings = {
         'CONCURRENT_REQUESTS': 2,
         'ITEM_PIPELINES': {'favorites_crawler.pipelines.PicturePipeline': 0},
+        'HTTPERROR_ALLOWED_CODES': [400],
     }
 
     @property
@@ -41,31 +42,7 @@ def __init__(self, *args, **kwargs):
             "withVoice": True,
             "withV2Timeline": True
         }
-        self.features = {
-            "rweb_tipjar_consumption_enabled": True,
-            "responsive_web_graphql_exclude_directive_enabled": True,
-            "verified_phone_label_enabled": False, "creator_subscriptions_tweet_preview_api_enabled": True,
-            "responsive_web_graphql_timeline_navigation_enabled": True,
-            "responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
-            "communities_web_enable_tweet_community_results_fetch": True,
-            "c9s_tweet_anatomy_moderator_badge_enabled": True,
-            "articles_preview_enabled": True,
-            "tweetypie_unmention_optimization_enabled": True,
-            "responsive_web_edit_tweet_api_enabled": True,
-            "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
-            "view_counts_everywhere_api_enabled": True,
-            "longform_notetweets_consumption_enabled": True,
-            "responsive_web_twitter_article_tweet_consumption_enabled": True,
-            "tweet_awards_web_tipping_enabled": False,
-            "creator_subscriptions_quote_tweet_preview_enabled": False,
-            "freedom_of_speech_not_reach_fetch_enabled": True,
-            "standardized_nudges_misinfo": True,
-            "tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
-            "rweb_video_timestamps_enabled": True,
-            "longform_notetweets_rich_text_read_enabled": True,
-            "longform_notetweets_inline_media_enabled": True,
-            "responsive_web_enhance_cards_enabled": False
-        }
+        self.features = self.custom_settings.get('FEATURES', {})
         self.headers = {
             'Authorization': self.custom_settings.get('AUTHORIZATION'),
             'x-csrf-token': self.custom_settings.get('X_CSRF_TOKEN'),
@@ -79,6 +56,9 @@ def parse_start_url(self, response, **kwargs):
             yield item_or_request
 
     def parse(self, response, **kwargs):
+        if response.status == 400:
+            self.logger.error('Failed to request x API, error message: %s', response.json())
+
         entries = (
             entry['content'] for entry in DictRouter(response.json()).route_to(
             'data.user.result.timeline_v2.timeline.instructions.0.entries', [],

diff --git a/src/favorites_crawler/utils/auth.py b/src/favorites_crawler/utils/auth.py
@@ -1,3 +1,7 @@
+import json
+import re
+from urllib.parse import unquote
+
 from gppt import GetPixivToken
 from gppt.consts import REDIRECT_URI
 from selenium.common import TimeoutException
@@ -31,3 +35,18 @@ def refresh_pixiv():
     pixiv_config['ACCESS_TOKEN'] = access_token
     dump_config(config)
     return access_token
+
+
+def parse_twitter_likes_url(url):
+    """Parse USER_ID and LIKES_ID from URL"""
+    url = unquote(url).replace(' ', '')
+    match = re.match(r'^.+?graphql/(.+?)/.+?userId":"(.+?)".+$', url)
+    return match.groups()
+
+
+def parser_twitter_likes_features(url):
+    url = unquote(url).replace(' ', '')
+    features = re.match(r'^.+features=(\{.+?}).+$', url)
+    if features:
+        features = json.loads(features.group(1))
+    return features
diff --git a/src/favorites_crawler/utils/config.py b/src/favorites_crawler/utils/config.py
@@ -1,7 +1,8 @@
 import os
 import yaml
+from copy import deepcopy
+
 
-DEFAULT_FAVORS_HOME = os.path.expanduser('~/.favorites_crawler')
 DEFAULT_CONFIG = {
     'global': {
         'ENABLE_ORGANIZE_BY_ARTIST': True,
@@ -29,24 +30,27 @@
         'FILES_STORE': 'favorites_crawler_files/lemon',
     },
     'nhentai': {
+        'USER_AGENT': '',
         'FILES_STORE': 'favorites_crawler_files/nhentai',
     }
 }
 
 
-def load_config(home: str = DEFAULT_FAVORS_HOME) -> dict:
+def load_config(home: str) -> dict:
     """Load config from user home"""
+    home = os.path.expanduser(home)
     create_favors_home(home)
     config_file = os.path.join(home, 'config.yml')
     if not os.path.exists(config_file):
         dump_config(DEFAULT_CONFIG, home)
-        return DEFAULT_CONFIG
+        return deepcopy(DEFAULT_CONFIG)
     with open(config_file, encoding='utf8') as f:
         return yaml.safe_load(f)
 
 
-def dump_config(data: dict, home: str = DEFAULT_FAVORS_HOME):
+def dump_config(data: dict, home: str):
     """Dump config data to user home"""
+    home = os.path.expanduser(home)
     create_favors_home(home)
     config_file = os.path.join(home, 'config.yml')
     with open(config_file, 'w', encoding='utf8') as f:
Original file line number	Diff line number	Diff line change
Expand Up		@@ -11,4 +11,3 @@
		NHENTAI_USER_FAVORITES_URL = 'https://nhentai.net/favorites/'

		TWITTER_LIKES_URL = 'https://x.com/i/api/graphql/{id}/Likes'
		TWITTER_PROFILE_LIKES_URL = 'https://x.com/{username}/likes'