Skip to content

Commit

Permalink
add option for using a single database
Browse files Browse the repository at this point in the history
  • Loading branch information
mxdev88 committed May 16, 2021
1 parent 6b9663b commit 2793fd6
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 11 deletions.
5 changes: 5 additions & 0 deletions scrapydweb/default_settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,3 +366,8 @@
# 'sqlite:///C:/Users/username'
# 'sqlite:////home/username'
DATABASE_URL = os.environ.get('DATABASE_URL', '')

# The default is False, which means ScrapydWeb uses multiple databases internally to save its data.
# When set to True, ScrapydWeb will use only one database defined by DATABASE_URL.
# When using with PostgreSQL or MySQL with this mode, it is assumed the database is already created and ready to use.
DATABASE_USE_SINGLE = os.environ.get('DATABASE_USE_SINGLE', False)
50 changes: 40 additions & 10 deletions scrapydweb/utils/setup_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,10 @@

SCRAPYDWEB_TESTMODE = os.environ.get('SCRAPYDWEB_TESTMODE', 'False').lower() == 'true'

def clean_path(path):
path = re.sub(r'\\', '/', path)
return re.sub(r'/$', '', path)


def test_database_url_pattern(database_url):
m_mysql = PATTERN_MYSQL.match(database_url)
Expand All @@ -24,11 +28,42 @@ def test_database_url_pattern(database_url):
return m_mysql, m_postgres, m_sqlite


def setup_database(database_url, database_path):
database_url = re.sub(r'\\', '/', database_url)
database_url = re.sub(r'/$', '', database_url)
database_path = re.sub(r'\\', '/', database_path)
database_path = re.sub(r'/$', '', database_path)
def setup_database(database_url, database_path, database_use_single):
if database_use_single:
databases = setup_single_database(database_url, database_path)
else:
databases = setup_multi_database(database_url, database_path)

if SCRAPYDWEB_TESTMODE:
print("APSCHEDULER_DATABASE_URI: %s" % databases[0])
print("SQLALCHEMY_DATABASE_URI: %s" % databases[1])
print("SQLALCHEMY_BINDS: %s" % databases[2])
print("DATABASE_PATH: %s" % databases[3])

return databases


def setup_single_database(database_url, database_path):
database_url = clean_path(database_url)
database_path = clean_path(database_path)
m_mysql, m_postgres, m_sqlite = test_database_url_pattern(database_url)

if m_mysql or m_postgres:
database_uri = database_url
else:
database_uri = 'sqlite:///' + '/'.join([database_path, 'scrapydweb.db'])

apscheduler_database_uri, sqlalchemy_database_uri, database_path = database_uri, database_uri, database_uri
sqlalchemy_binds = {
'metadata': database_uri,
'jobs': database_uri
}

return apscheduler_database_uri, sqlalchemy_database_uri, sqlalchemy_binds, database_path

def setup_multi_database(database_url, database_path):
database_url = clean_path(database_url)
database_path = clean_path(database_path)

m_mysql, m_postgres, m_sqlite = test_database_url_pattern(database_url)
if m_mysql:
Expand Down Expand Up @@ -60,11 +95,6 @@ def setup_database(database_url, database_path):
'jobs': 'sqlite:///' + '/'.join([database_path, 'jobs.db'])
}

if SCRAPYDWEB_TESTMODE:
print("DATABASE_PATH: %s" % database_path)
print("APSCHEDULER_DATABASE_URI: %s" % APSCHEDULER_DATABASE_URI)
print("SQLALCHEMY_DATABASE_URI: %s" % SQLALCHEMY_DATABASE_URI)
print("SQLALCHEMY_BINDS: %s" % SQLALCHEMY_BINDS)
return APSCHEDULER_DATABASE_URI, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_BINDS, database_path


Expand Down
6 changes: 5 additions & 1 deletion scrapydweb/vars.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

from .default_settings import DATA_PATH as default_data_path
from .default_settings import DATABASE_URL as default_database_url
from .default_settings import DATABASE_USE_SINGLE as default_database_use_single
from .utils.setup_database import setup_database


Expand All @@ -22,11 +23,13 @@
except ImportError:
custom_data_path = ''
custom_database_url = ''
custom_database_use_single = False
else:
custom_data_path = getattr(custom_settings_module, 'DATA_PATH', '')
custom_data_path = custom_data_path if isinstance(custom_data_path, str) else ''
custom_database_url = getattr(custom_settings_module, 'DATABASE_URL', '')
custom_database_url = custom_database_url if isinstance(custom_database_url, str) else ''
custom_database_use_single = getattr(custom_settings_module, 'DATABASE_USE_SINGLE', False)

# For data storage
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
Expand Down Expand Up @@ -59,7 +62,8 @@

# For database
DATABASE_URL = custom_database_url or default_database_url or 'sqlite:///' + DATABASE_PATH
results = setup_database(DATABASE_URL, DATABASE_PATH)
DATABASE_USE_SINGLE = custom_database_use_single or default_database_use_single
results = setup_database(DATABASE_URL, DATABASE_PATH, DATABASE_USE_SINGLE)
APSCHEDULER_DATABASE_URI, SQLALCHEMY_DATABASE_URI, SQLALCHEMY_BINDS, DATABASE_PATH = results

# For check_app_config() and BaseView
Expand Down

0 comments on commit 2793fd6

Please sign in to comment.