Skip to content

Commit

Permalink
Add throttle option to load_domain_data command
Browse files Browse the repository at this point in the history
Only used by CouchDataLoader for now.

In practice, couch can be easily overloaded by the high load encountered
when saving many different documents. The throttle value of 0.25 seconds
has worked with projects that have encountered this issue. If in the
future this value is insufficient, it might be worth allowing the
calling user to specify a throttle value rather than setting a flag.
  • Loading branch information
gherceg committed May 5, 2024
1 parent 8bc308b commit 9f6bd00
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 3 deletions.
3 changes: 2 additions & 1 deletion corehq/apps/dump_reload/couch/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ def _create_db_for_doc_type(self, doc_type):
callback = LoaderCallback(self._success_counter, self.stdout)
large_doc_types = [Application._doc_type, LinkedApplication._doc_type, RemoteApp._doc_type]
chunksize = 1 if doc_type in large_doc_types else self.chunksize
db = IterDB(couch_db, new_edits=False, callback=callback, chunksize=chunksize)
throttle_secs = 0.25 if self.should_throttle else None
db = IterDB(couch_db, new_edits=False, callback=callback, chunksize=chunksize, throttle_secs=throttle_secs)
db.__enter__()
return db

Expand Down
3 changes: 2 additions & 1 deletion corehq/apps/dump_reload/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,11 +37,12 @@ def dump(self, output_stream):


class DataLoader(metaclass=ABCMeta):
def __init__(self, object_filter=None, stdout=None, stderr=None, chunksize=None):
def __init__(self, object_filter=None, stdout=None, stderr=None, chunksize=None, should_throttle=False):
self.stdout = stdout or sys.stdout
self.stderr = stderr or sys.stderr
self.object_filter = re.compile(object_filter, re.IGNORECASE) if object_filter else None
self.chunksize = chunksize
self.should_throttle = should_throttle

@abstractproperty
def slug(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,12 +70,14 @@ def add_arguments(self, parser):
parser.add_argument('--json-output', action="store_true", help="Produce JSON output for use in tests")
parser.add_argument('--chunksize', type=int, default=100,
help="Set custom chunksize in case it runs into large couch documents")
parser.add_argument('--throttle', action="store_false", help="Throttle saves to database")

def handle(self, dump_file_path, **options):
self.force = options.get('force')
self.dry_run = options.get('dry_run')
self.use_extracted = options.get('use_extracted')
self.chunksize = options.get('chunksize')
self.should_throttle = options.get('throttle')

if not os.path.isfile(dump_file_path):
raise CommandError("Dump file not found: {}".format(dump_file_path))
Expand Down Expand Up @@ -125,7 +127,7 @@ def extract_dump_archive(self, dump_file_path):

def _load_data(self, loader_class, extracted_dump_path, object_filter, dump_meta):
try:
loader = loader_class(object_filter, self.stdout, self.stderr, self.chunksize)
loader = loader_class(object_filter, self.stdout, self.stderr, self.chunksize, self.should_throttle)
return loader.load_from_path(extracted_dump_path, dump_meta, force=self.force, dry_run=self.dry_run)
except DataExistsException as e:
raise CommandError('Some data already exists. Use --force to load anyway: {}'.format(str(e)))
Expand Down

0 comments on commit 9f6bd00

Please sign in to comment.