Skip to content

Commit

Permalink
refactor: add global config
Browse files Browse the repository at this point in the history
  • Loading branch information
RyouMon committed Dec 2, 2024
1 parent 8396e47 commit e421690
Show file tree
Hide file tree
Showing 4 changed files with 193 additions and 36 deletions.
25 changes: 9 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,8 @@ You can set any [scrapy built-in settings](https://docs.scrapy.org/en/latest/top
By default, file content likes this:
```yaml
global:
ENABLE_ORGANIZE_BY_ARTIST: true
pixiv:
ACCESS_TOKEN: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
REFRESH_TOKEN: xxxxxxxxxxxxxxxxxxxxxxxxxxxx
Expand All @@ -150,27 +152,18 @@ yandere:
```
## Organize file by artist
if you want to organize pixiv illust by user, add this line to your config:
if you want to organize pixiv and yandere files by artist, add this line to your config:
```yaml
pixiv:
# FAVORS_PIXIV_ENABLE_ORGANIZE_BY_USER: true # (Deprecation)
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
```
if you want to organize yandere post by artist, add this line to your config:
```yaml
yandere:
ENABLE_ORGANIZE_BY_ARTIST: true # add this line to your yandere config
global:
ENABLE_ORGANIZE_BY_ARTIST: true
```
## Store tags to IPTC/Keywords
only support pixiv and yandere.
only support pixiv, yandere and twitter.
```yaml
yandere:
ENABLE_WRITE_IPTC_KEYWORDS: true # default: true
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None
pixiv:
ENABLE_WRITE_IPTC_KEYWORDS: true # default: true
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None
global:
ENABLE_WRITE_IPTC_KEYWORDS: true
EXIF_TOOL_EXECUTABLE: '<Path to your exiftool executable>' # default None, If the executable is not in the path, set it manually
```
# Restore your favorites
Expand Down
11 changes: 8 additions & 3 deletions src/favorites_crawler/commands/crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
from scrapy.utils.project import get_project_settings
from scrapy.spiderloader import SpiderLoader

from favorites_crawler.utils.config import load_config, overwrite_settings
from favorites_crawler.utils.config import load_config, overwrite_spider_settings

app = typer.Typer(help='Crawl your favorites from websites.', no_args_is_help=True)

os.environ.setdefault('SCRAPY_SETTINGS_MODULE', 'favorites_crawler.settings')
scrapy_settings = get_project_settings()
spider_loader = SpiderLoader(scrapy_settings)
overwrite_settings(spider_loader, scrapy_settings, load_config())


@app.command('yandere')
Expand Down Expand Up @@ -70,8 +69,14 @@ def crawl(name, **kwargs):
:param name: spider name
:param kwargs: kwargs passed to spider's __init__ method
"""
spider = spider_loader.load(name)
overwrite_spider_settings(spider, scrapy_settings, load_config())

Check warning on line 73 in src/favorites_crawler/commands/crawl.py

View check run for this annotation

Codecov / codecov/patch

src/favorites_crawler/commands/crawl.py#L72-L73

Added lines #L72 - L73 were not covered by tests
process = CrawlerProcess(scrapy_settings)
process.crawl(name, **kwargs)
process.crawl(spider, **kwargs)

Check warning on line 75 in src/favorites_crawler/commands/crawl.py

View check run for this annotation

Codecov / codecov/patch

src/favorites_crawler/commands/crawl.py#L75

Added line #L75 was not covered by tests
for crawler in process.crawlers:
crawler.signals.connect(spider_closed, signal=signals.spider_closed)
process.start()


if __name__ == '__main__':
crawl('pixiv')

Check warning on line 82 in src/favorites_crawler/commands/crawl.py

View check run for this annotation

Codecov / codecov/patch

src/favorites_crawler/commands/crawl.py#L82

Added line #L82 was not covered by tests
80 changes: 63 additions & 17 deletions src/favorites_crawler/utils/config.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,80 @@
import os

import yaml

config_path = os.path.expanduser('~/.favorites_crawler')
config_file = os.path.join(config_path, 'config.yml')
if not os.path.exists(config_path):
os.mkdir(config_path)
DEFAULT_FAVORS_HOME = os.path.expanduser('~/.favorites_crawler')
DEFAULT_CONFIG = {
'global': {
'ENABLE_ORGANIZE_BY_ARTIST': True,
'ENABLE_WRITE_IPTC_KEYWORDS': True,
'EXIF_TOOL_EXECUTABLE': None,
},
'pixiv': {
'FILES_STORE': 'favorites_crawler_files/pixiv',
'USER_ID': '',
'ACCESS_TOKEN': '',
'REFRESH_TOKEN': '',
},
'yandere': {
'FILES_STORE': 'favorites_crawler_files/yandere',
'USERNAME': '',
},
'twitter': {
'FILES_STORE': 'favorites_crawler_files/twitter',
'USER_ID': '',
'AUTHORIZATION': '',
'LIKES_ID': '',
'X_CSRF_TOKEN': '',
},
'lemon': {
'FILES_STORE': 'favorites_crawler_files/lemon',
},
'nhentai': {
'FILES_STORE': 'favorites_crawler_files/nhentai',
}
}


def load_config():
def load_config(home: str = DEFAULT_FAVORS_HOME) -> dict:
"""Load config from user home"""
create_favors_home(home)
config_file = os.path.join(home, 'config.yml')
if not os.path.exists(config_file):
return {}
dump_config(DEFAULT_CONFIG, home)
return DEFAULT_CONFIG
with open(config_file, encoding='utf8') as f:
return yaml.safe_load(f)


def dump_config(data):
def dump_config(data: dict, home: str = DEFAULT_FAVORS_HOME):
"""Dump config data to user home"""
create_favors_home(home)
config_file = os.path.join(home, 'config.yml')
with open(config_file, 'w', encoding='utf8') as f:
yaml.safe_dump(data, f, allow_unicode=True)


def overwrite_settings(spider_loader, settings, user_config):
spider_names = spider_loader.list()
for name in spider_names:
cls = spider_loader.load(name)
spider_config = user_config.get(cls.name, {})
if spider_config:
cls.custom_settings.update(spider_config)
def create_favors_home(path: str):
"""Create favors home if not exists"""
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)


def overwrite_spider_settings(spider, default_settings, user_config):
"""
Overwrite spider settings by user config
Priority: favors spider config > favors global config > spider custom settings > scrapy settings
:param spider: Spider class
:param default_settings: :class:`scrapy.settings.Settings`
:param user_config: favorites crawler config
"""
global_config = user_config.get('global')
if global_config:
spider.custom_settings.update(global_config)

spider_config = user_config.get(spider.name)
if spider_config:
spider.custom_settings.update(spider_config)

default_files_store = os.path.join(settings.get('FILES_STORE', ''), cls.name)
cls.custom_settings.setdefault('FILES_STORE', default_files_store)
default_files_store = os.path.join(default_settings.get('FILES_STORE', ''), spider.name)
spider.custom_settings.setdefault('FILES_STORE', default_files_store)
113 changes: 113 additions & 0 deletions tests/test_utils/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import os
import yaml

from scrapy.utils.project import get_project_settings
from scrapy.spiderloader import SpiderLoader

from favorites_crawler.utils.config import (
create_favors_home, load_config, dump_config, DEFAULT_CONFIG, overwrite_spider_settings)

scrapy_settings = get_project_settings()
spider_loader = SpiderLoader(scrapy_settings)


class TestCreateFavorsHome:
def test_should_create_path_when_path_exists(self, tmp_path):
test_path = tmp_path / "existing_dir"
test_path.mkdir()

create_favors_home(str(test_path))

assert test_path.exists()
assert test_path.is_dir()

def test_skip_create_path_when_path_not_exists(self, tmp_path):
test_path = tmp_path / "non_existing_dir"

create_favors_home(str(test_path))

assert test_path.exists()
assert test_path.is_dir()


class TestLoadConfig:
def test_load_config_when_config_not_exists(self, tmp_path):
favors_home = str(tmp_path)

config = load_config(favors_home)

assert config == DEFAULT_CONFIG
config_file = os.path.join(favors_home, 'config.yml')
assert os.path.exists(config_file)

with open(config_file, encoding='utf8') as f:
written_config = yaml.safe_load(f)
assert written_config == DEFAULT_CONFIG

def test_load_config_when_config_exists(self, tmp_path):
favors_home = str(tmp_path)
config_file = os.path.join(favors_home, 'config.yml')
existing_config = {'global': {'ENABLE_ORGANIZE_BY_ARTIST': False}}

with open(config_file, 'w', encoding='utf8') as f:
yaml.safe_dump(existing_config, f)

config = load_config(favors_home)

assert config == existing_config


class TestDumpConfig:
def test_dump_config_to_favors_home(self, tmp_path):
favors_home = str(tmp_path)
new_config = {'global': {'ENABLE_ORGANIZE_BY_ARTIST': False}}

dump_config(new_config, favors_home)

config_file = os.path.join(favors_home, 'config.yml')
assert os.path.exists(config_file)

with open(config_file, encoding='utf8') as f:
written_config = yaml.safe_load(f)
assert written_config == new_config


class TestOverwriteSpiderSettings:
def test_overwrite_spider_settings(self):
user_config = {
'global': {
'ENABLE_ORGANIZE_BY_ARTIST': True,
},
'pixiv': {
'FILES_STORE': '/pixiv',
}
}
spider = spider_loader.load('pixiv')

overwrite_spider_settings(spider, scrapy_settings, user_config)

assert spider.custom_settings['FILES_STORE'] == user_config['pixiv']['FILES_STORE']
assert spider.custom_settings['ENABLE_ORGANIZE_BY_ARTIST'] == user_config['global']['ENABLE_ORGANIZE_BY_ARTIST']

def test_spider_config_priority_should_gt_global_config(self):
user_config = {
'global': {
'ENABLE_ORGANIZE_BY_ARTIST': True,
},
'yandere': {
'ENABLE_ORGANIZE_BY_ARTIST': False,
}
}
spider = spider_loader.load('yandere')

overwrite_spider_settings(spider, scrapy_settings, user_config)

assert spider.custom_settings['ENABLE_ORGANIZE_BY_ARTIST'] == user_config['yandere']['ENABLE_ORGANIZE_BY_ARTIST']

def test_should_set_default_file_store_when_user_doesnt_config_it(self):
user_config = {}
spider = spider_loader.load('nhentai')

overwrite_spider_settings(spider, scrapy_settings, user_config)

assert spider.custom_settings['FILES_STORE'] == os.path.join(scrapy_settings.get('FILES_STORE', ''), 'nhentai')

0 comments on commit e421690

Please sign in to comment.