Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve complex forms performance #740

Merged
merged 15 commits into from
Dec 4, 2024
Merged
Changes from 1 commit
Commits
Show all changes
15 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
fix: poor performance with large and complex forms with many references
- builder.py
  - replace "copy_json_dict" with copy.deepcopy since that's what it
    seems to be doing (probably less efficiently)
- question.py
  - for each SurveyElement subclass that overrides __init__, ensure
    that it calls the parent / super().__init__ (and calls it last)
  - to allow the above, rearrange the choices/children/tags into just
    "children" so that the base __init__ can add them.
  - generally the goal is to avoid use cases where the dict info from
    the XLSForm spec is modified after __init__ to help with the below
- survey_element.py
  - avoid recursive calls through __getattr__ by using the base class
    dict.get when those keys are known to exist and aren't in FIELDS
  - get the default info from the question_type_dict once at init time,
    instead of potentially looking it up on every call to __getattr__
  - avoid many calls to get_lineage by caching the xpath after it
    is calculated once, and invalidate the cache if the parent changes
    (although this doesn't seem to ever happen during test runs).
  - avoid many calls to iter_descendants by caching the result of
    "any_repeat" (dict can be quite large since it's every element).
    - This does not seem like the optimal solution yet since there are
      many related recursive calls that could be avoided
    - Also this function is only used for the survey class so it would
      make sense to move it there.
- for the form described in central/#171, the form conversion total time
  is reduced from about 5 minutes to about 45 seconds. Resident memory
  usage about 43MB baseline and 150MB after conversion.
  • Loading branch information
lindsay-stevens committed Nov 14, 2024

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 42bb2d6733c1fd47694ede0e842dfb55084a7779
25 changes: 1 addition & 24 deletions pyxform/builder.py
Original file line number Diff line number Diff line change
@@ -52,29 +52,6 @@
}


def copy_json_dict(json_dict):
"""
Returns a deep copy of the input json_dict
"""
json_dict_copy = None
items = None

if isinstance(json_dict, list):
json_dict_copy = [None] * len(json_dict)
items = enumerate(json_dict)
elif isinstance(json_dict, dict):
json_dict_copy = {}
items = json_dict.items()

for key, value in items:
if isinstance(value, dict | list):
json_dict_copy[key] = copy_json_dict(value)
else:
json_dict_copy[key] = value

return json_dict_copy


class SurveyElementBuilder:
def __init__(self, **kwargs):
# I don't know why we would need an explicit none option for
@@ -143,7 +120,7 @@ def create_survey_element_from_dict(
else:
self._save_trigger(d=d)
return self._create_question_from_dict(
d, copy_json_dict(QUESTION_TYPE_DICT), self._add_none_option
d, copy.deepcopy(QUESTION_TYPE_DICT), self._add_none_option
)

def _save_trigger(self, d: dict) -> None:
60 changes: 23 additions & 37 deletions pyxform/question.py
Original file line number Diff line number Diff line change
@@ -17,6 +17,7 @@
from pyxform.utils import (
PYXFORM_REFERENCE_REGEX,
DetachableElement,
combine_lists,
default_is_dynamic,
node,
)
@@ -189,21 +190,17 @@ def _translation_path(self, display_element):

class MultipleChoiceQuestion(Question):
def __init__(self, **kwargs):
kwargs_copy = kwargs.copy()
# Notice that choices can be specified under choices or children.
# I'm going to try to stick to just choices.
# Aliases in the json format will make it more difficult
# to use going forward.
choices = list(kwargs_copy.pop("choices", [])) + list(
kwargs_copy.pop("children", [])
)
Question.__init__(self, **kwargs_copy)
for choice in choices:
self.add_choice(**choice)

def add_choice(self, **kwargs):
option = Option(**kwargs)
self.add_child(option)
kwargs["children"] = [
Option(**c)
for c in combine_lists(
a=kwargs.pop("choices", None), b=kwargs.pop("children", None)
)
]
super().__init__(**kwargs)

def validate(self):
Question.validate(self)
@@ -320,23 +317,19 @@ def build_xml(self):

class SelectOneQuestion(MultipleChoiceQuestion):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self._dict[self.TYPE] = "select one"
super().__init__(**kwargs)


class Tag(SurveyElement):
def __init__(self, **kwargs):
kwargs_copy = kwargs.copy()
choices = kwargs_copy.pop("choices", []) + kwargs_copy.pop("children", [])

super().__init__(**kwargs_copy)

if choices:
self.children = []

for choice in choices:
option = Option(**choice)
self.add_child(option)
kwargs["children"] = [
Option(**c)
for c in combine_lists(
a=kwargs.pop("choices", None), b=kwargs.pop("children", None)
)
]
super().__init__(**kwargs)

def xml(self):
result = node("tag", key=self.name)
@@ -356,20 +349,13 @@ def xml_control(self):

class OsmUploadQuestion(UploadQuestion):
def __init__(self, **kwargs):
kwargs_copy = kwargs.copy()
tags = kwargs_copy.pop("tags", []) + kwargs_copy.pop("children", [])

super().__init__(**kwargs_copy)

if tags:
self.children = []

for tag in tags:
self.add_tag(**tag)

def add_tag(self, **kwargs):
tag = Tag(**kwargs)
self.add_child(tag)
kwargs["children"] = [
Tag(**c)
for c in combine_lists(
a=kwargs.pop("tags", None), b=kwargs.pop("children", None)
)
]
super().__init__(**kwargs)

def build_xml(self):
control_dict = self.control
56 changes: 44 additions & 12 deletions pyxform/survey_element.py
Original file line number Diff line number Diff line change
@@ -75,6 +75,13 @@ def any_repeat(survey_element: "SurveyElement", parent_xpath: str) -> bool:
return False


SURVEY_ELEMENT_LOCAL_KEYS = {
"_survey_element_default_type",
"_survey_element_repeats",
"_survey_element_xpath",
}


class SurveyElement(dict):
"""
SurveyElement is the base class we'll looks for the following keys
@@ -85,31 +92,40 @@ class SurveyElement(dict):
__name__ = "SurveyElement"
FIELDS: ClassVar[dict[str, Any]] = FIELDS.copy()

def _default(self):
# TODO: need way to override question type dictionary
defaults = QUESTION_TYPE_DICT
return defaults.get(self.get("type"), {})
def _dict_get(self, key):
"""Built-in dict.get to bypass __getattr__"""
return dict.get(self, key)

def __getattr__(self, key):
"""
Get attributes from FIELDS rather than the class.
"""
if key in self.FIELDS:
question_type_dict = self._default()
under = question_type_dict.get(key, None)
over = self.get(key)
under = None
default = self._dict_get("_survey_element_default_type")
if default is not None:
under = default.get(key, None)
if not under:
return over
return _overlay(over, under)
return self._dict_get(key)
return _overlay(self._dict_get(key), under)
raise AttributeError(key)

def __hash__(self):
return hash(id(self))

def __setattr__(self, key, value):
if key == "parent":
# Object graph local changes invalidate these cached facts.
self._survey_element_repeats = {}
self._survey_element_xpath = None
self[key] = value

def __init__(self, **kwargs):
self._survey_element_default_type: dict = kwargs.get(
"question_type_dict", QUESTION_TYPE_DICT
).get(kwargs.get("type"), {})
self._survey_element_xpath: str | None = None
self._survey_element_repeats: dict = {}
for key, default in self.FIELDS.items():
self[key] = kwargs.get(key, default())
self._link_children()
@@ -120,6 +136,7 @@ def __init__(self, **kwargs):
# themselves.
if self.control.get("appearance") == "label" and not self.label:
self["label"] = " "
super().__init__()

def _link_children(self):
for child in self.children:
@@ -161,7 +178,12 @@ def iter_descendants(self):

def any_repeat(self, parent_xpath: str) -> bool:
"""Return True if there ia any repeat in `parent_xpath`."""
return any_repeat(survey_element=self, parent_xpath=parent_xpath)
current_value = self._dict_get("_survey_element_repeats")
if parent_xpath not in current_value:
new_value = any_repeat(survey_element=self, parent_xpath=parent_xpath)
current_value[parent_xpath] = new_value
return new_value
return current_value[parent_xpath]

def get_lineage(self):
"""
@@ -184,7 +206,12 @@ def get_xpath(self):
"""
Return the xpath of this survey element.
"""
return "/".join([""] + [n.name for n in self.get_lineage()])
current_value = self._dict_get("_survey_element_xpath")
if current_value is None:
new_value = f'/{"/".join(n.name for n in self.get_lineage())}'
self._survey_element_xpath = new_value
return new_value
return current_value

def get_abbreviated_xpath(self):
lineage = self.get_lineage()
@@ -213,7 +240,12 @@ def to_json_dict(self):
"""
self.validate()
result = self.copy()
to_delete = ["parent", "question_type_dictionary", "_created"]
to_delete = [
"parent",
"question_type_dictionary",
"_created",
*SURVEY_ELEMENT_LOCAL_KEYS,
]
# Delete all keys that may cause a "Circular Reference"
# error while converting the result to JSON
self._delete_keys_from_dict(result, to_delete)
16 changes: 16 additions & 0 deletions pyxform/utils.py
Original file line number Diff line number Diff line change
@@ -335,3 +335,19 @@ def levenshtein_distance(a: str, b: str) -> int:

def coalesce(*args):
return next((a for a in args if a is not None), None)


def combine_lists(
a: list[dict] | None = None,
b: list[dict] | None = None,
) -> list[dict]:
"""Get the list that is not None, or both lists combined, or an empty list."""
if a is None and b is None:
combined = []
elif a is None:
combined = b
elif b is None:
combined = a
else:
combined = a + b
return combined
3 changes: 3 additions & 0 deletions tests/parsing/test_expression.py
Original file line number Diff line number Diff line change
@@ -31,6 +31,9 @@
("name with space", "Invalid character (space)"),
("na@me", "Invalid character (@)"),
("na#me", "Invalid character (#)"),
("name:.name", "Invalid character (in local name)"),
("-name:name", "Invalid character (in namespace)"),
("$name:@name", "Invalid character (in both names)"),
("name:name:name", "Invalid structure (multiple colons)"),
]

24 changes: 0 additions & 24 deletions tests/test_j2x_creation.py
Original file line number Diff line number Diff line change
@@ -10,30 +10,6 @@


class Json2XformVerboseSurveyCreationTests(TestCase):
def test_survey_can_be_created_in_a_verbose_manner(self):
s = Survey()
s.name = "simple_survey"

q = MultipleChoiceQuestion()
q.name = "cow_color"
q.type = "select one"

q.add_choice(label="Green", name="green")
s.add_child(q)

expected_dict = {
"name": "simple_survey",
"children": [
{
"name": "cow_color",
"type": "select one",
"children": [{"label": "Green", "name": "green"}],
}
],
}
self.maxDiff = None
self.assertEqual(s.to_json_dict(), expected_dict)

def test_survey_can_be_created_in_a_slightly_less_verbose_manner(self):
option_dict_array = [
{"name": "red", "label": "Red"},