Merge pull request #225 from ArtrenH/main

2023-11-04
vplan-fr · Nov 4, 2023 · 056e116 · 056e116
2 parents a7f5ba7 + 2b05f10
commit 056e116
Show file tree

Hide file tree

Showing 16 changed files with 462 additions and 275 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -22,6 +22,7 @@ jobs:
           git checkout production
           git pull
           source venv/bin/activate.fish
+          pip install --upgrade pip
           pip install -r requirements.txt
           
           cd client

diff --git a/backend/import_files.py b/backend/import_files.py
@@ -3,11 +3,11 @@
 import sys
 from pathlib import Path
 
-from .load_plans import get_clients
+from .load_plans import get_crawlers
 
 
 async def main():
-    clients = await get_clients()
+    clients = await get_crawlers()
 
     directory = Path(sys.argv[1])
 

diff --git a/backend/lesson_info.py b/backend/lesson_info.py
@@ -417,10 +417,6 @@ def create_literal_parsed_info(msg: str) -> ParsedLessonInfo:
     )
 
 
-def resolve_teacher_abbreviations(surnames: list[str], abbreviation_by_surname: dict[str, str]) -> list[str]:
-    return [abbreviation_by_surname.get(surname, surname) for surname in surnames]
-
-
 def _parse_form_plan_message(info: str, lesson: models.Lesson) -> tuple[ParsedLessonInfoMessage, re.Match | None]:
     if match := _InfoParsers.substitution.match(info):
         return InsteadOfCourse(
@@ -649,14 +645,16 @@ def sorted_canonical(self) -> ParsedLessonInfo:
             sorted(paragraphs, key=lambda p: [i.parsed.original_messages for i in p.messages])
         )
 
-    def resolve_teachers(self, teacher_abbreviation_by_surname: dict[str, str]):
+    def resolve_teachers(self, teachers: teacher_model.Teachers):
         for paragraph in self.paragraphs:
             for message in paragraph.messages:
                 if hasattr(message.parsed, "_teachers"):
-                    message.parsed.other_info_value = resolve_teacher_abbreviations(
-                        message.parsed._teachers,
-                        teacher_abbreviation_by_surname
-                    )
+                    try:
+                        message.parsed.other_info_value = [
+                            teachers.query_plan_teacher(teacher_str).plan_short for teacher_str in message.parsed._teachers
+                        ]
+                    except LookupError:
+                        message.parsed.other_info_value = None
 
     def lesson_group_sort_key(self) -> list[list[list[str]]]:
         return [
@@ -682,14 +680,14 @@ def __add__(self, other: ParsedLessonInfo):
 
 
 def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *,
-                     logger: logging.Logger) -> dict[str, teacher_model.Teacher]:
+                     logger: logging.Logger) -> typing.Iterable[teacher_model.Teacher]:
     out: dict[str, teacher_model.Teacher] = {}
 
-    for teacher_abbreviation in lesson.teachers or ():
-        out[teacher_abbreviation] = teacher_model.Teacher(teacher_abbreviation)
+    for plan_short in lesson.teachers or ():
+        out[plan_short] = teacher_model.Teacher(plan_short, last_seen=lesson._lesson_date)
 
     if lesson._is_scheduled:
-        return out
+        return ()
 
     for paragraph in lesson.parsed_info.paragraphs:
         for message in paragraph.messages:
@@ -740,28 +738,34 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *,
                     continue
 
                 abbreviation = list(_class.values())[0].teacher
-                teacher = teacher_model.Teacher(abbreviation, None, surname, None, [])
 
-                out[teacher.abbreviation] = teacher
+                if not abbreviation:
+                    continue
 
-    return out
+                out[abbreviation] = teacher_model.Teacher(
+                    plan_short=abbreviation,
+                    plan_long=surname,
+                    last_seen=lesson._lesson_date
+                )
+
+    return out.values()
 
 
 def process_additional_info(info: list[str], parsed_existing_forms: list[ParsedForm],
-                            teacher_abbreviation_by_surname: dict[str, str], date: datetime.date
+                            teachers: teacher_model.Teachers, date: datetime.date
                             ) -> list[list[LessonInfoTextSegment]]:
     info = info.copy()
     while info and not info[-1]:
         info.pop()
 
     return [
-        process_additional_info_line(line, parsed_existing_forms, teacher_abbreviation_by_surname, date)
+        process_additional_info_line(line, parsed_existing_forms, teachers, date)
         for line in info
     ]
 
 
 def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedForm],
-                                 teacher_abbreviation_by_surname: dict[str, str], date: datetime.date
+                                 teachers: teacher_model.Teachers, date: datetime.date
                                  ) -> list[LessonInfoTextSegment]:
     if text is None:
         return []
@@ -771,7 +775,7 @@ def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedFo
     text = re.sub(r"\b {1,3}\b", " ", text.strip())
 
     funcs = (
-        lambda s: add_fuzzy_teacher_links(s, teacher_abbreviation_by_surname, date),
+        lambda s: add_fuzzy_teacher_links(s, teachers, date),
         lambda s: add_fuzzy_form_links(s, parsed_existing_forms, date)
     )
 
@@ -857,24 +861,20 @@ def validator(match: re.Match) -> list[LessonInfoTextSegment] | None:
     return add_fuzzy_with_validator(text, [_loose_parse_form_pattern], validator)
 
 
-def add_fuzzy_teacher_links(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date):
-    abbreviations = set(teacher_abbreviation_by_surname.values())
-
+def add_fuzzy_teacher_links(text: str, teachers: teacher_model.Teachers, date: datetime.date):
     def validator(match: re.Match) -> list[LessonInfoTextSegment] | None:
         surname_or_abbreviation = match.group()
 
-        if surname_or_abbreviation not in abbreviations and surname_or_abbreviation in teacher_abbreviation_by_surname:
-            abbreviation = teacher_abbreviation_by_surname[surname_or_abbreviation]
-        elif surname_or_abbreviation in abbreviations:
-            abbreviation = surname_or_abbreviation
-        else:
-            abbreviation = None
+        try:
+            plan_short = teachers.query_plan_teacher(surname_or_abbreviation).plan_short
+        except LookupError:
+            plan_short = None
 
-        if abbreviation is not None:
+        if plan_short is not None:
             return [
                 LessonInfoTextSegment(
                     surname_or_abbreviation,
-                    link=LessonInfoTextSegmentLink("teachers", [abbreviation], date, None)
+                    link=LessonInfoTextSegmentLink("teachers", [plan_short], date, None)
                 )
             ]
         else:

diff --git a/backend/load_plans.py b/backend/load_plans.py
@@ -54,39 +54,43 @@ async def check_infinite(self, interval: int = 60, *, once: bool = False, ignore
             await asyncio.sleep(interval)
 
 
-async def get_clients(session: aiohttp.ClientSession | None = None,
-                      proxy_provider: proxies.ProxyProvider | None = None) -> dict[str, PlanCrawler]:
+async def get_crawlers(session: aiohttp.ClientSession | None = None,
+                       proxy_provider: proxies.ProxyProvider | None = None,
+                       create_clients: bool = True) -> dict[str, PlanCrawler]:
     creds_provider = creds_provider_factory(Path("creds.json"))
     _creds = creds_provider.get_creds()
 
-    clients = {}
+    crawlers = {}
 
     for school_name, data in _creds.items():
         specifier = data['school_number'] if 'school_number' in data else school_name
         logger = logging.getLogger(specifier)
         cache = Cache(Path(f".cache/{specifier}").absolute())
 
-        data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students"))
-        hosting = Hosting.deserialize(data["hosting"])
-        client = IndiwareStundenplanerClient(hosting, session)
+        if create_clients:
+            data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students"))
+            hosting = Hosting.deserialize(data["hosting"])
+            client = IndiwareStundenplanerClient(hosting, session)
 
-        for plan_client in client.substitution_plan_clients:
-            plan_client.proxy_provider = proxy_provider
-            plan_client.no_delay = True
+            for plan_client in client.substitution_plan_clients:
+                plan_client.proxy_provider = proxy_provider
+                plan_client.no_delay = True
 
-        for plan_client in client.indiware_mobil_clients:
-            plan_client.proxy_provider = proxy_provider
-            plan_client.no_delay = True
+            for plan_client in client.indiware_mobil_clients:
+                plan_client.proxy_provider = proxy_provider
+                plan_client.no_delay = True
+        else:
+            client = None
 
         plan_downloader = PlanDownloader(client, cache, logger=logger)
         plan_processor = PlanProcessor(cache, specifier, logger=logger)
 
         # create crawler
         p = PlanCrawler(plan_downloader, plan_processor)
 
-        clients[school_name] = p
+        crawlers[school_name] = p
 
-    return clients
+    return crawlers
 
 
 async def main():
@@ -119,7 +123,7 @@ async def main():
                                            never_raise_out_of_proxies=args.never_raise_out_of_proxies)
     # list(proxy_provider.fetch_proxies())
 
-    clients = await get_clients(proxy_provider=proxy_provider)
+    clients = await get_crawlers(proxy_provider=proxy_provider, create_clients=not args.only_process)
     try:
         if args.only_process:
             for client in clients.values():

diff --git a/backend/meta_extractor.py b/backend/meta_extractor.py
@@ -19,22 +19,27 @@ class DailyMetaExtractor:
     def __init__(self, plankl_file: str):
         self.form_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plankl_file))
 
-    def teachers(self) -> dict[str, list[str]]:
-        excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA"]
+    def teachers(self) -> list[Teacher]:
+        excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA", "EU4"]
 
-        all_teachers = set()
+        out = []
         for form in self.form_plan.forms:
             for lesson in form.lessons:
-                if lesson.teacher():
-                    all_teachers.add(lesson.teacher())
+                for teacher in (lesson.teacher() or "").split():
+                    if not teacher:
+                        continue
+                    out.append(Teacher(plan_short=teacher, last_seen=self.form_plan.date))
 
-        teachers = defaultdict(list, {teacher: [] for teacher in all_teachers})
-        for form in self.form_plan.forms:
             for class_ in form.classes.values():
-                if class_.teacher and class_.subject not in excluded_subjects:
-                    teachers[class_.teacher].append(class_.subject)
+                subjects = set(s for s in class_.subject.split() if s not in excluded_subjects)
 
-        return teachers
+                for teacher in class_.teacher.split():
+                    if not teacher:
+                        continue
+
+                    out.append(Teacher(plan_short=teacher, subjects=subjects, last_seen=self.form_plan.date))
+
+        return out
 
     def forms(self) -> list[str]:
         return [form.short_name for form in self.form_plan.forms]
@@ -73,7 +78,7 @@ def free_days(self) -> list[datetime.date]:
 
 
 class MetaExtractor:
-    def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Logger):
+    def __init__(self, cache: Cache, num_last_days: int | None = 10, *, logger: logging.Logger):
         self._logger = logger
 
         self.cache = cache
@@ -85,6 +90,7 @@ def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Log
     def iterate_daily_extractors(self) -> typing.Generator[DailyMetaExtractor, None, None]:
         for day in self.cache.get_days()[:self.num_last_days]:
             for timestamp in self.cache.get_timestamps(day):
+                self._logger.log(5, f"Yielding DailyMetaExtractor for {day!s} {timestamp!s}.")
                 if (day, timestamp) in self._daily_extractors:
                     yield self._daily_extractors[(day, timestamp)]
                 else:
@@ -117,20 +123,7 @@ def rooms(self) -> set[str]:
         return rooms
 
     def teachers(self) -> list[Teacher]:
-        teachers: dict[str, list[str]] = defaultdict(list)
-
-        for extractor in self.iterate_daily_extractors():
-            for _teacher, subjects in extractor.teachers().items():
-                for teacher in _teacher.split(" "):
-                    teachers[teacher].extend(subjects)
-
-        for teacher, subjects in teachers.items():
-            teachers[teacher] = sorted(set(subjects))
-
-        return [
-            Teacher(abbreviation, None, None, None, subjects=subjects)
-            for abbreviation, subjects in teachers.items()
-        ]
+        return sum((e.teachers() for e in self.iterate_daily_extractors()), [])
 
     def forms(self) -> list[str]:
         forms: set[str] = set()