Skip to content

Commit

Permalink
improve additional info generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Belissimo-T committed Sep 26, 2023
1 parent fda9d22 commit 174c4ec
Show file tree
Hide file tree
Showing 3 changed files with 24 additions and 14 deletions.
26 changes: 14 additions & 12 deletions backend/lesson_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import typing

from .vplan_utils import periods_to_block_label, parse_periods, _parse_form_pattern, ParsedForm, parsed_forms_to_str, \
MajorMinorParsedForm, AlphanumParsedForm, forms_to_str
MajorMinorParsedForm, AlphanumParsedForm, forms_to_str, _loose_parse_form_pattern
from . import models


Expand Down Expand Up @@ -751,7 +751,7 @@ def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedFo
text = re.sub(r"(?<=\w)/ ", "/", text.strip())

funcs = (
lambda s: add_fuzzy_teachers(s, teacher_abbreviation_by_surname, date),
lambda s: add_fuzzy_teacher_links(s, teacher_abbreviation_by_surname, date),
lambda s: add_fuzzy_form_links(s, parsed_existing_forms, date)
)

Expand All @@ -772,10 +772,13 @@ def add_fuzzy_form_links(text: str, parsed_existing_forms: list[ParsedForm], dat
) -> list[LessonInfoTextSegment]:
segments = []

prev = 0
for match in re.finditer(_parse_form_pattern, text):
segments.append(LessonInfoTextSegment(text[prev:match.start()]))
prev = match.end()
i = 0
while i < len(text):
match = re.search(_loose_parse_form_pattern, text[i:])

if match is None:
segments.append(LessonInfoTextSegment(text=text[i:]))
break

parsed_forms = ParsedForm.from_form_match(match)

Expand All @@ -798,23 +801,22 @@ def add_fuzzy_form_links(text: str, parsed_existing_forms: list[ParsedForm], dat
matched_forms.append(form_match)

if matched_forms:
segments.append(LessonInfoTextSegment(text[i:i+match.start()]))
segments.append(
LessonInfoTextSegment(
parsed_forms_to_str(matched_forms),
link=LessonInfoTextSegmentLink("forms", [f.to_str() for f in matched_forms], date, None)
)
)
i += match.end()
else:
segments[-1].text += match.group()

segments.append(LessonInfoTextSegment(
text=text[prev:]
))
segments.append(LessonInfoTextSegment(text[i]))
i += 1

return segments


def add_fuzzy_teachers(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date):
def add_fuzzy_teacher_links(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date):
abbreviations = set(teacher_abbreviation_by_surname.values())

segments = []
Expand Down
2 changes: 1 addition & 1 deletion backend/plan_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@


class PlanProcessor:
VERSION = "79"
VERSION = "81"

def __init__(self, cache: Cache, school_number: str, *, logger: logging.Logger):
self._logger = logger
Expand Down
10 changes: 9 additions & 1 deletion backend/vplan_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,15 @@
r"(?P<sep>[/.] |[^A-Za-zÄÖÜäöüß0-9() \n]?)"
r"(?P<minor>(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?)(?:,(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?))*)"
r"|(?P<alpha>\d{1,2})"
r")(?![^\s,])"
r")(?![^\s,:])"
)
_loose_parse_form_pattern = re.compile(
r"(?<!\S)(?:"
r"(?P<major>\d{1,2}(?!\d)|[A-Za-zÄÖÜäöüß]+(?![A-Za-zÄÖÜäöüß]))"
r"(?P<sep>[/.]? |[^A-Za-zÄÖÜäöüß0-9() \n]?)"
r"(?P<minor>(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?)(?:,(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?))*)"
r"|(?P<alpha>\d{1,2})"
r")(?![^\s,:])"
)


Expand Down

0 comments on commit 174c4ec

Please sign in to comment.