From 2793fd6bc8c9f93af3b310a56abb10c4f24b363f Mon Sep 17 00:00:00 2001 From: mxdev88 Date: Fri, 14 May 2021 12:20:49 +0200 Subject: [PATCH] add option for using a single database --- scrapydweb/default_settings.py | 5 +++ scrapydweb/utils/setup_database.py | 50 ++++++++++++++++++++++++------ scrapydweb/vars.py | 6 +++- 3 files changed, 50 insertions(+), 11 deletions(-) diff --git a/scrapydweb/default_settings.py b/scrapydweb/default_settings.py index 8e215a7..1e5067c 100644 --- a/scrapydweb/default_settings.py +++ b/scrapydweb/default_settings.py @@ -366,3 +366,8 @@ # 'sqlite:///C:/Users/username' # 'sqlite:////home/username' DATABASE_URL = os.environ.get('DATABASE_URL', '') + +# The default is False, which means ScrapydWeb uses multiple databases internally to save its data. +# When set to True, ScrapydWeb will use only one database defined by DATABASE_URL. +# When using with PostgreSQL or MySQL with this mode, it is assumed the database is already created and ready to use. +DATABASE_USE_SINGLE = os.environ.get('DATABASE_USE_SINGLE', False) diff --git a/scrapydweb/utils/setup_database.py b/scrapydweb/utils/setup_database.py index 0d2523e..868dd09 100644 --- a/scrapydweb/utils/setup_database.py +++ b/scrapydweb/utils/setup_database.py @@ -16,6 +16,10 @@ SCRAPYDWEB_TESTMODE = os.environ.get('SCRAPYDWEB_TESTMODE', 'False').lower() == 'true' +def clean_path(path): + path = re.sub(r'\\', '/', path) + return re.sub(r'/$', '', path) + def test_database_url_pattern(database_url): m_mysql = PATTERN_MYSQL.match(database_url) @@ -24,11 +28,42 @@ def test_database_url_pattern(database_url): return m_mysql, m_postgres, m_sqlite -def setup_database(database_url, database_path): - database_url = re.sub(r'\\', '/', database_url) - database_url = re.sub(r'/$', '', database_url) - database_path = re.sub(r'\\', '/', database_path) - database_path = re.sub(r'/$', '', database_path) +def setup_database(database_url, database_path, database_use_single): + if database_use_single: + databases = setup_single_database(database_url, database_path) + else: + databases = setup_multi_database(database_url, database_path) + + if SCRAPYDWEB_TESTMODE: + print("APSCHEDULER_DATABASE_URI: %s" % databases[0]) + print("SQLALCHEMY_DATABASE_URI: %s" % databases[1]) + print("SQLALCHEMY_BINDS: %s" % databases[2]) + print("DATABASE_PATH: %s" % databases[3]) + + return databases + + +def setup_single_database(database_url, database_path): + database_url = clean_path(database_url) + database_path = clean_path(database_path) + m_mysql, m_postgres, m_sqlite = test_database_url_pattern(database_url) + + if m_mysql or m_postgres: + database_uri = database_url + else: + database_uri = 'sqlite:///' + '/'.join([database_path, 'scrapydweb.db']) + + apscheduler_database_uri, sqlalchemy_database_uri, database_path = database_uri, database_uri, database_uri + sqlalchemy_binds = { + 'metadata': database_uri, + 'jobs': database_uri + } + + return apscheduler_database_uri, sqlalchemy_database_uri, sqlalchemy_binds, database_path + +def setup_multi_database(database_url, database_path): + database_url = clean_path(database_url) + database_path = clean_path(database_path) m_mysql, m_postgres, m_sqlite = test_database_url_pattern(database_url) if m_mysql: @@ -60,11 +95,6 @@ def setup_database(database_url, database_path): 'jobs': 'sqlite:///' + '/'.join([database_path, 'jobs.db']) } - if SCRAPYDWEB_TESTMODE: - print("DATABASE_PATH: %s" % database_path) - print("APSCHEDULER_DATABASE_URI: %s" % APSCHEDULER_DATABASE_URI) - print("SQLALCHEMY_DATABASE_URI: %s" % SQLALCHEMY_DATABASE_URI) - print("SQLALCHEMY_BINDS: %s" % SQLALCHEMY_BINDS) return APSCHEDULER_DATABASE_URI, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_BINDS, database_path diff --git a/scrapydweb/vars.py b/scrapydweb/vars.py index 66b4633..f7d2aa2 100644 --- a/scrapydweb/vars.py +++ b/scrapydweb/vars.py @@ -10,6 +10,7 @@ from .default_settings import DATA_PATH as default_data_path from .default_settings import DATABASE_URL as default_database_url +from .default_settings import DATABASE_USE_SINGLE as default_database_use_single from .utils.setup_database import setup_database @@ -22,11 +23,13 @@ except ImportError: custom_data_path = '' custom_database_url = '' + custom_database_use_single = False else: custom_data_path = getattr(custom_settings_module, 'DATA_PATH', '') custom_data_path = custom_data_path if isinstance(custom_data_path, str) else '' custom_database_url = getattr(custom_settings_module, 'DATABASE_URL', '') custom_database_url = custom_database_url if isinstance(custom_database_url, str) else '' + custom_database_use_single = getattr(custom_settings_module, 'DATABASE_USE_SINGLE', False) # For data storage ROOT_DIR = os.path.dirname(os.path.abspath(__file__)) @@ -59,7 +62,8 @@ # For database DATABASE_URL = custom_database_url or default_database_url or 'sqlite:///' + DATABASE_PATH -results = setup_database(DATABASE_URL, DATABASE_PATH) +DATABASE_USE_SINGLE = custom_database_use_single or default_database_use_single +results = setup_database(DATABASE_URL, DATABASE_PATH, DATABASE_USE_SINGLE) APSCHEDULER_DATABASE_URI, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_BINDS, DATABASE_PATH = results # For check_app_config() and BaseView