From 174c4eceabcc55a925f8cc03947834976171fb1d Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Tue, 26 Sep 2023 23:54:24 +0200 Subject: [PATCH] improve additional info generation --- backend/lesson_info.py | 26 ++++++++++++++------------ backend/plan_processor.py | 2 +- backend/vplan_utils.py | 10 +++++++++- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/backend/lesson_info.py b/backend/lesson_info.py index 177ef43..bc5a69b 100644 --- a/backend/lesson_info.py +++ b/backend/lesson_info.py @@ -8,7 +8,7 @@ import typing from .vplan_utils import periods_to_block_label, parse_periods, _parse_form_pattern, ParsedForm, parsed_forms_to_str, \ - MajorMinorParsedForm, AlphanumParsedForm, forms_to_str + MajorMinorParsedForm, AlphanumParsedForm, forms_to_str, _loose_parse_form_pattern from . import models @@ -751,7 +751,7 @@ def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedFo text = re.sub(r"(?<=\w)/ ", "/", text.strip()) funcs = ( - lambda s: add_fuzzy_teachers(s, teacher_abbreviation_by_surname, date), + lambda s: add_fuzzy_teacher_links(s, teacher_abbreviation_by_surname, date), lambda s: add_fuzzy_form_links(s, parsed_existing_forms, date) ) @@ -772,10 +772,13 @@ def add_fuzzy_form_links(text: str, parsed_existing_forms: list[ParsedForm], dat ) -> list[LessonInfoTextSegment]: segments = [] - prev = 0 - for match in re.finditer(_parse_form_pattern, text): - segments.append(LessonInfoTextSegment(text[prev:match.start()])) - prev = match.end() + i = 0 + while i < len(text): + match = re.search(_loose_parse_form_pattern, text[i:]) + + if match is None: + segments.append(LessonInfoTextSegment(text=text[i:])) + break parsed_forms = ParsedForm.from_form_match(match) @@ -798,23 +801,22 @@ def add_fuzzy_form_links(text: str, parsed_existing_forms: list[ParsedForm], dat matched_forms.append(form_match) if matched_forms: + segments.append(LessonInfoTextSegment(text[i:i+match.start()])) segments.append( LessonInfoTextSegment( parsed_forms_to_str(matched_forms), link=LessonInfoTextSegmentLink("forms", [f.to_str() for f in matched_forms], date, None) ) ) + i += match.end() else: - segments[-1].text += match.group() - - segments.append(LessonInfoTextSegment( - text=text[prev:] - )) + segments.append(LessonInfoTextSegment(text[i])) + i += 1 return segments -def add_fuzzy_teachers(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date): +def add_fuzzy_teacher_links(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date): abbreviations = set(teacher_abbreviation_by_surname.values()) segments = [] diff --git a/backend/plan_processor.py b/backend/plan_processor.py index 67fb5d9..57349fd 100644 --- a/backend/plan_processor.py +++ b/backend/plan_processor.py @@ -14,7 +14,7 @@ class PlanProcessor: - VERSION = "79" + VERSION = "81" def __init__(self, cache: Cache, school_number: str, *, logger: logging.Logger): self._logger = logger diff --git a/backend/vplan_utils.py b/backend/vplan_utils.py index 555bc95..2131969 100644 --- a/backend/vplan_utils.py +++ b/backend/vplan_utils.py @@ -14,7 +14,15 @@ r"(?P[/.] |[^A-Za-zÄÖÜäöüß0-9() \n]?)" r"(?P(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?)(?:,(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?))*)" r"|(?P\d{1,2})" - r")(?![^\s,])" + r")(?![^\s,:])" +) +_loose_parse_form_pattern = re.compile( + r"(?\d{1,2}(?!\d)|[A-Za-zÄÖÜäöüß]+(?![A-Za-zÄÖÜäöüß]))" + r"(?P[/.]? |[^A-Za-zÄÖÜäöüß0-9() \n]?)" + r"(?P(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?)(?:,(?:\d{1,2}[A-Za-zÄÖÜäöüß]?|[A-Za-zÄÖÜäöüß]+?))*)" + r"|(?P\d{1,2})" + r")(?![^\s,:])" )