Skip to content

Commit

Permalink
fix (html upload): strip span tags
Browse files Browse the repository at this point in the history
  • Loading branch information
brownben committed Dec 19, 2024
1 parent 0903426 commit 4944f8a
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions backend/src/utils/import_file/import_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)
NBSP_REGEX = re.compile(" ", flags=re.IGNORECASE)
SOCIAL_LINK_REGEX = re.compile("<a href=.*?></a>", flags=re.IGNORECASE)
SPAN_REGEX = re.compile("<span.*?>|<\\/span>", flags=re.IGNORECASE)


def parse_sitiming_script(script_tag_text: str) -> list[str]:
Expand All @@ -38,8 +39,9 @@ def parse_sitiming_script(script_tag_text: str) -> list[str]:
# replace &nbsp; with real spaces
script_text = NBSP_REGEX.sub("", script_text)

# remove the links to social media
# remove the links to social media, and span tags
script_text = SOCIAL_LINK_REGEX.sub("", script_text)
script_text = SPAN_REGEX.sub("", script_text)

# split into the blocks of JSON
IF_RETURN = r";\n*\s*if\s*\(tableNumber == [0-9]+\)\n*\s*return\s*\n*"
Expand Down Expand Up @@ -103,7 +105,7 @@ def process_html_file(file: str) -> Iterable[ImportedRecord]:
header_row, result_rows = parse_sitiming_file(file)
else:
raise ImportException(
"Unknown HTML format, currently only able to import from British Orienteering and SITiming results."
"Unknown HTML format, currently only able to import from SITiming results."
)

if len(result_rows) == 0:
Expand Down

0 comments on commit 4944f8a

Please sign in to comment.