Skip to content

Commit

Permalink
Merge pull request #64 from RyouMon/fix-login
Browse files Browse the repository at this point in the history
Update login method of twitter and nhentai. Add FAVORS_HOME env var.
  • Loading branch information
RyouMon authored Dec 4, 2024
2 parents ecd5f2d + 27e47cf commit 29d2ad9
Show file tree
Hide file tree
Showing 10 changed files with 404 additions and 99 deletions.
60 changes: 39 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,31 +64,45 @@ favors login --help
```

## Login Yandere
1. run command:
```
favors login yandere
```
2. input your username and hit the Enter key.
run command:
```
favors login yandere -u {username}
```

## Login NHentai
1. Open nhentai on browser and login.
2. Use "Get cookies.txt" browser extension download cookie file.
3. Copy cookie file to {user_home}/.favorites_crawler.
1. Get User-Agent and Cookie File
1. Open nhentai and login.
2. Open dev console (F12) and switch to network tab.
3. Open any comic.
4. Copy user-agent from any request.
5. Use "Get cookies.txt" browser extension download cookie file.
2. Execute command:
```commandline
favors login nhentai -u "{User-Agent}" -c "{Cookie File}"
```

## Login Twitter
1. run command
```
favors login twitter
```
2. input your username, after press Enter, likes page will open in browser.
3. Open dev console (F12) and switch to network tab.
4. Enable persistent logging ("Preserve log").
5. Type into the filter field: Likes?
6. Refresh Page.
7. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...) input on terminal.
8. Use "Get cookies.txt" browser extension download cookie file.
9. Copy cookie file to {user_home}/.favorites_crawler.

1. Get Authorization, X-Csrf-Token RequestURL and Cookie File
1. Open [x.com](https://x.com/) and login, get to your "Likes" page
2. Open dev console (F12) and switch to network tab.
3. Enable persistent logging ("Preserve log").
4. Type into the filter field: Likes?
5. Refresh Page.
6. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...)
7. Use "Get cookies.txt" browser extension download cookie file.
2. Execute command:
```commandline
favors login x -at "{Authorization}" -ct "{X-Csrf-Token}" -u "{RequestURL}" -c "{Cookie File}"
```
Example:
```commandline
favors login x -at "Bearer AAAAAAAAAAAAA..." -ct ... -u "https://x.com/i/api/graphql/.../Likes?..." -c "C:\Users\xxx\Downloads\x.com_cookies.txt"
```

Note: Request URL will make the entire command very long.
If you cannot enter such a long command in the macOS terminal,
you can write the command in a sh file and execute it.

# Crawl

Expand Down Expand Up @@ -121,7 +135,11 @@ favors crawl twitter
```

# Config
Config file locate on `{your_home}/.favorites_crawler/config.yml`.

Config file `config.yml` locate on `FAVORS_HOME`,
by default `FAVORS_HOME` is `{your_home}/.favorites_crawler`.
You can change `FAVORS_HOME` by set environment variable.

You can set any [scrapy built-in settings](https://docs.scrapy.org/en/latest/topics/settings.html#built-in-settings-reference) in this file.

By default, file content likes this:
Expand Down
8 changes: 3 additions & 5 deletions src/favorites_crawler/commands/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from scrapy.spiderloader import SpiderLoader

from favorites_crawler.utils.config import load_config, overwrite_spider_settings
from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME

app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)

Expand Down Expand Up @@ -70,13 +71,10 @@ def crawl(name, **kwargs):
:param kwargs: kwargs passed to spider's __init__ method
"""
spider = spider_loader.load(name)
overwrite_spider_settings(spider, scrapy_settings, load_config())
favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
overwrite_spider_settings(spider, scrapy_settings, load_config(favors_home))
process = CrawlerProcess(scrapy_settings)
process.crawl(spider, **kwargs)
for crawler in process.crawlers:
crawler.signals.connect(spider_closed, signal=signals.spider_closed)
process.start()


if __name__ == '__main__':
crawl('pixiv')
106 changes: 73 additions & 33 deletions src/favorites_crawler/commands/login.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
import re
import os
import shutil
from typing import Optional
from urllib.parse import unquote
from webbrowser import open as open_url

import typer
from selenium.common import NoSuchWindowException

from favorites_crawler.constants.endpoints import TWITTER_PROFILE_LIKES_URL
from favorites_crawler.utils.auth import CustomGetPixivToken
from favorites_crawler.utils.auth import CustomGetPixivToken, parse_twitter_likes_url, parser_twitter_likes_features
from favorites_crawler.utils.config import dump_config, load_config
from favorites_crawler.constants.path import DEFAULT_FAVORS_HOME


app = typer.Typer(help='Prepare auth information for crawling.', no_args_is_help=True)
Expand All @@ -35,13 +33,14 @@ def login_pixiv(
If you do not provide your username and password, you will login manually on the web page
"""
config = load_config()
favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
config = load_config(favors_home)
token_getter = CustomGetPixivToken()
try:
login_info = token_getter.login(username=username, password=password)
except NoSuchWindowException:
print('Failed to login.')
return
except Exception as e:
print(f'Failed to login. {e!r}')
exit(1)

pixiv_config = config.setdefault('pixiv', {})
try:
Expand All @@ -50,8 +49,9 @@ def login_pixiv(
pixiv_config['REFRESH_TOKEN'] = login_info['refresh_token']
except KeyError as e:
print(f'Failed to login. {e!r}')
exit(1)
else:
dump_config(config)
dump_config(config, favors_home)
print("Login successful.")


Expand All @@ -65,49 +65,89 @@ def login_yandere(
"""
Login to yandere.
"""
config = load_config()
favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
config = load_config(favors_home)
yandere_config = config.setdefault('yandere', {})
yandere_config['USERNAME'] = username
dump_config(config)
dump_config(config, favors_home)
print("Login successful.")


@app.command('x')
@app.command('twitter')
def login_twitter(
username: str = typer.Option(
..., '-u', '--username',
help="Your twitter username."
auth_token: str = typer.Option(
..., '-at', '--auth-token',
help='Authorization Token (Copy from Dev console)'
),
csrf_token: str = typer.Option(
..., '-ct', '--csrf-token',
help='Authorization Token (Copy from Dev console)'
),
likes_url: str = typer.Option(
..., '-u', '--likes-url',
help='Request URL of Likes API (Copy from Dev console)'
),
cookie_file: str = typer.Option(
..., '-c', '--cookie-file',
help='Netscape HTTP Cookie File, you can download it by "Get cookies.txt" browser extension.'
)
):
"""
Login to twitter.
1. After execute this command, likes page will open in browser.\n
1. Open twitter and login, get to your "Likes" page.\n
2. Open dev console (F12) and switch to network tab.\n
3. Enable persistent logging ("Preserve log").\n
4. Type into the filter field: Likes?\n
5. Refresh Page.\n
6. Copy Authorization, X-Csrf-Token and RequestURL from request(Likes?variables...) input on terminal.\n
7. Use "Get cookies.txt" browser extension download cookie file.\n
8. Copy cookie file to {user_home}/.favorites_crawler.
7. Use "Get cookies.txt" browser extension download cookie file.
"""
open_url(TWITTER_PROFILE_LIKES_URL.format(username=username))
config = load_config()
favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
config = load_config(favors_home)
twitter_config = config.setdefault('twitter', {})
try:
twitter_config['AUTHORIZATION'] = input('Authorization: ')
twitter_config['X_CSRF_TOKEN'] = input('X-Csrf-Token: ')
twitter_config['LIKES_ID'], twitter_config['USER_ID'] = parse_twitter_likes_url(input('Request URL: '))
except KeyboardInterrupt:
"Failed to login."
return
dump_config(config)
twitter_config['AUTHORIZATION'] = auth_token
twitter_config['X_CSRF_TOKEN'] = csrf_token
twitter_config['LIKES_ID'], twitter_config['USER_ID'] = parse_twitter_likes_url(likes_url)
twitter_config['FEATURES'] = parser_twitter_likes_features(likes_url)
shutil.copy(cookie_file, favors_home)
except Exception as e:
print(f"Failed to login: {e!r}")
exit(1)
dump_config(config, favors_home)
print("Login successful.")


def parse_twitter_likes_url(url):
"""Parse USER_ID and LIKES_ID from URL"""
url = unquote(url).replace(' ', '')
match = re.match(r'^.+?graphql/(.+?)/.+?userId":"(.+?)".+$', url)
return match.groups()
@app.command("nhentai")
def login_nhentai(
user_agent: str = typer.Option(
..., '-u', '--user-agent',
help='User Agent'
),
cookie_file: str = typer.Option(
..., '-c', '--cookie-file',
help='Netscape HTTP Cookie File, you can download it by "Get cookies.txt" browser extension.'
)
):
"""
Login to nhentai.
1. Open nhentai and login.\n
2. Open dev console (F12) and switch to network tab.\n
3. Open any comic.\n
4. Copy user-agent from any request.\n
5. Use "Get cookies.txt" browser extension download cookie file.
"""
favors_home = os.getenv('FAVORS_HOME', DEFAULT_FAVORS_HOME)
config = load_config(favors_home)
nhentai_config = config.setdefault('nhentai', {})
try:
nhentai_config['USER_AGENT'] = user_agent
shutil.copy(cookie_file, favors_home)
except Exception as e:
print(f"Failed to login: {e!r}")
exit(1)
dump_config(config, favors_home)
print("Login successful.")
1 change: 0 additions & 1 deletion src/favorites_crawler/constants/endpoints.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,3 @@
NHENTAI_USER_FAVORITES_URL = 'https://nhentai.net/favorites/'

TWITTER_LIKES_URL = 'https://x.com/i/api/graphql/{id}/Likes'
TWITTER_PROFILE_LIKES_URL = 'https://x.com/{username}/likes'
1 change: 1 addition & 0 deletions src/favorites_crawler/constants/path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
DEFAULT_FAVORS_HOME = '~/.favorites_crawler'
30 changes: 5 additions & 25 deletions src/favorites_crawler/spiders/twitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class TwitterSpider(BaseSpider):
custom_settings = {
'CONCURRENT_REQUESTS': 2,
'ITEM_PIPELINES': {'favorites_crawler.pipelines.PicturePipeline': 0},
'HTTPERROR_ALLOWED_CODES': [400],
}

@property
Expand All @@ -41,31 +42,7 @@ def __init__(self, *args, **kwargs):
"withVoice": True,
"withV2Timeline": True
}
self.features = {
"rweb_tipjar_consumption_enabled": True,
"responsive_web_graphql_exclude_directive_enabled": True,
"verified_phone_label_enabled": False, "creator_subscriptions_tweet_preview_api_enabled": True,
"responsive_web_graphql_timeline_navigation_enabled": True,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": False,
"communities_web_enable_tweet_community_results_fetch": True,
"c9s_tweet_anatomy_moderator_badge_enabled": True,
"articles_preview_enabled": True,
"tweetypie_unmention_optimization_enabled": True,
"responsive_web_edit_tweet_api_enabled": True,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
"view_counts_everywhere_api_enabled": True,
"longform_notetweets_consumption_enabled": True,
"responsive_web_twitter_article_tweet_consumption_enabled": True,
"tweet_awards_web_tipping_enabled": False,
"creator_subscriptions_quote_tweet_preview_enabled": False,
"freedom_of_speech_not_reach_fetch_enabled": True,
"standardized_nudges_misinfo": True,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": True,
"rweb_video_timestamps_enabled": True,
"longform_notetweets_rich_text_read_enabled": True,
"longform_notetweets_inline_media_enabled": True,
"responsive_web_enhance_cards_enabled": False
}
self.features = self.custom_settings.get('FEATURES', {})
self.headers = {
'Authorization': self.custom_settings.get('AUTHORIZATION'),
'x-csrf-token': self.custom_settings.get('X_CSRF_TOKEN'),
Expand All @@ -79,6 +56,9 @@ def parse_start_url(self, response, **kwargs):
yield item_or_request

def parse(self, response, **kwargs):
if response.status == 400:
self.logger.error('Failed to request x API, error message: %s', response.json())

entries = (
entry['content'] for entry in DictRouter(response.json()).route_to(
'data.user.result.timeline_v2.timeline.instructions.0.entries', [],
Expand Down
19 changes: 19 additions & 0 deletions src/favorites_crawler/utils/auth.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
import json
import re
from urllib.parse import unquote

from gppt import GetPixivToken
from gppt.consts import REDIRECT_URI
from selenium.common import TimeoutException
Expand Down Expand Up @@ -31,3 +35,18 @@ def refresh_pixiv():
pixiv_config['ACCESS_TOKEN'] = access_token
dump_config(config)
return access_token


def parse_twitter_likes_url(url):
"""Parse USER_ID and LIKES_ID from URL"""
url = unquote(url).replace(' ', '')
match = re.match(r'^.+?graphql/(.+?)/.+?userId":"(.+?)".+$', url)
return match.groups()


def parser_twitter_likes_features(url):
url = unquote(url).replace(' ', '')
features = re.match(r'^.+features=(\{.+?}).+$', url)
if features:
features = json.loads(features.group(1))
return features
12 changes: 8 additions & 4 deletions src/favorites_crawler/utils/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import os
import yaml
from copy import deepcopy


DEFAULT_FAVORS_HOME = os.path.expanduser('~/.favorites_crawler')
DEFAULT_CONFIG = {
'global': {
'ENABLE_ORGANIZE_BY_ARTIST': True,
Expand Down Expand Up @@ -29,24 +30,27 @@
'FILES_STORE': 'favorites_crawler_files/lemon',
},
'nhentai': {
'USER_AGENT': '',
'FILES_STORE': 'favorites_crawler_files/nhentai',
}
}


def load_config(home: str = DEFAULT_FAVORS_HOME) -> dict:
def load_config(home: str) -> dict:
"""Load config from user home"""
home = os.path.expanduser(home)
create_favors_home(home)
config_file = os.path.join(home, 'config.yml')
if not os.path.exists(config_file):
dump_config(DEFAULT_CONFIG, home)
return DEFAULT_CONFIG
return deepcopy(DEFAULT_CONFIG)
with open(config_file, encoding='utf8') as f:
return yaml.safe_load(f)


def dump_config(data: dict, home: str = DEFAULT_FAVORS_HOME):
def dump_config(data: dict, home: str):
"""Dump config data to user home"""
home = os.path.expanduser(home)
create_favors_home(home)
config_file = os.path.join(home, 'config.yml')
with open(config_file, 'w', encoding='utf8') as f:
Expand Down
Loading

0 comments on commit 29d2ad9

Please sign in to comment.