From 85c6afe3eb1d8297a39525c6e360229009d92f92 Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Thu, 2 Nov 2023 23:47:35 +0100 Subject: [PATCH 1/7] add pip install --upgrade pip command to GH workflow --- .github/workflows/main.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index eab53601..703f2cff 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -22,6 +22,7 @@ jobs: git checkout production git pull source venv/bin/activate.fish + pip install --upgrade pip pip install -r requirements.txt cd client From 7c47c0dedcd4b81c1a57f00a3f34d70c57f97304 Mon Sep 17 00:00:00 2001 From: OfficialFreak Date: Fri, 3 Nov 2023 10:52:03 +0100 Subject: [PATCH 2/7] Design Progress for Favorites --- client/src/App.svelte | 4 +- client/src/base_components/Select.svelte | 2 + client/src/components/Favourites.svelte | 267 +++++++++++++++-------- client/src/components/Navbar.svelte | 4 +- 4 files changed, 184 insertions(+), 93 deletions(-) diff --git a/client/src/App.svelte b/client/src/App.svelte index 7e4c3c81..cc6f0d56 100644 --- a/client/src/App.svelte +++ b/client/src/App.svelte @@ -375,8 +375,6 @@ {:else if $current_page === "impressum"} - {:else if $current_page === "favourites"} - {:else if $logged_in} {#if $current_page.substring(0, 4) === "plan" || $current_page === "weekplan"}

{emoji} {greeting}

@@ -438,6 +436,8 @@ {/if} {:else if $current_page === "school_manager"} + {:else if $current_page === "favorites"} + {:else if $current_page === "pwa_install"} {:else} diff --git a/client/src/base_components/Select.svelte b/client/src/base_components/Select.svelte index ffec8a19..6722f95d 100644 --- a/client/src/base_components/Select.svelte +++ b/client/src/base_components/Select.svelte @@ -9,6 +9,7 @@ export let grouped = false; export let data_name = "Elemente"; export let preselect = null; + export let onchange = () => {}; let toggle_button; let selected_index = (preselect !== null) ? preselect : null; let grouped_length = null; @@ -85,6 +86,7 @@ selected_elem = data[selected_index]; } selected_id = selected_elem.id; + onchange(); } function unselect() { diff --git a/client/src/components/Favourites.svelte b/client/src/components/Favourites.svelte index 91fd773a..6d1278c4 100644 --- a/client/src/components/Favourites.svelte +++ b/client/src/components/Favourites.svelte @@ -2,10 +2,12 @@ import {customFetch, get_favourites, load_meta} from "../utils.js"; import Select from "../base_components/Select.svelte"; import Button from "../base_components/Button.svelte"; - import {onMount} from "svelte"; import {notifications} from "../notifications.js"; - import {favourites} from "../stores.js"; + import {favourites, title} from "../stores.js"; + import CollapsibleWrapper from "../base_components/CollapsibleWrapper.svelte"; + import Collapsible from "../base_components/Collapsible.svelte"; + import { onMount } from "svelte"; let cur_favourites = []; let all_schools = {}; @@ -14,18 +16,31 @@ $: school_nums = [...new Set(cur_favourites.map(obj => obj.school_num).filter(school_num => school_num !== ""))]; let all_meta = {}; let duplicated_courses_match = {}; + let plan_types = [ + {"display_name": "Klassenplan", "id": "forms"}, + {"display_name": "Lehrerplan", "id": "teachers"}, + {"display_name": "Raumplan", "id": "rooms"}, + {"display_name": "Freie Räume", "id": "room_overview"} + ]; + let plan_type_map = { + "forms": "Klasse", + "teachers": "Lehrer", + "rooms": "Raum" + } $: update_meta(school_nums); + get_schools(); + get_authorized_schools(); + get_favourites() + .then(data => { + load_favourites() + }); + onMount(() => { - get_schools(); - get_authorized_schools(); - get_favourites() - .then(data => { - load_favourites() - }); + location.hash = "#favorites"; + title.set("Favoriten"); }); - // duplicate from school manager but hard to simplify function get_schools() { customFetch("/api/v69.420/schools") @@ -100,7 +115,8 @@ } // clear everything except for the school num of a favourite function clear_favourite(favourite) { - cur_favourites[favourite] = {"school_num": cur_favourites[favourite].school_num, "name": cur_favourites[favourite].name, "priority": 0, "plan_type": "", "plan_value": "", "preferences": {}} + cur_favourites[favourite] = {"school_num": cur_favourites[favourite].school_num, "name": cur_favourites[favourite].name, "priority": 0, "plan_type":cur_favourites[favourite].plan_type, "plan_value": "", "preferences": {}} + cur_favourites = cur_favourites; } // delete a favourite function delete_favourite(favourite) { @@ -109,7 +125,6 @@ cur_favourites = new_array; } - // load all metadata for all schools needed function update_meta(school_nums) { for (let i = 0; i < school_nums.length; i++) { @@ -205,92 +220,166 @@ return courses_by_subject; } + function update_authorized_schools() { + authorized_schools = []; + for(const school_id of authorized_school_ids) { + if(all_schools.hasOwnProperty(school_id)) { + authorized_schools.push({ + "display_name": all_schools[school_id].display_name, + "id": school_id + }); + } + } + } + + let authorized_schools = []; + $: authorized_school_ids, all_schools, update_authorized_schools(); - - -




-{#each cur_favourites as _, favourite} -

- - - - - - {#if cur_favourites[favourite].plan_type === "forms"} - - - {#each Object.entries( - get_subjects(favourite, all_meta) - ).sort(([subj1, _], [subj2, __]) => subj1.localeCompare(subj2)).sort(([_, courses1], [__, courses2]) => courses2.length - courses1.length) as [subject, courses]} - {#if courses.length === 1} -

  • {subject}: - {courses[0].class_number} - {courses[0].teacher} | - {courses[0].subject} - {#if courses[0].group != null} - ({courses[0].group}) - {/if} -
  • - {:else} -
  • - {subject} - {#if courses.length > 2} - - - {/if} -
  • -
      - {#each courses as course} -
    • - - {course.class_number} - {course.teacher} | - {course.subject} - {#if course.group != null} - ({course.group}) + + {#each cur_favourites as _, favourite} + + +
      + + + + + + + {#if cur_favourites[favourite].plan_type} + {#if cur_favourites[favourite].plan_type === "forms"} + + +
      + {#each Object.entries( + get_subjects(favourite, all_meta) + ).sort(([subj1, _], [subj2, __]) => subj1.localeCompare(subj2)).sort(([_, courses1], [__, courses2]) => courses2.length - courses1.length) as [subject, courses]} + {#if courses.length === 1} +
    • {subject}: + {courses[0].class_number} + {courses[0].teacher} | + {courses[0].subject} + {#if courses[0].group != null} + ({courses[0].group}) + {/if} +
    • + {:else} +
    • + {subject} + {#if courses.length > 2} + + + {/if} +
    • +
        + {#each courses as course} +
      • + + {course.class_number} + {course.teacher} | + {course.subject} + {#if course.group != null} + ({course.group}) + {/if} +
      • + {/each} +
      {/if} - - {/each} -
    + {:else} + Wähle eine Klasse um die Kurse für sie zu wählen + {/each} + + {:else if cur_favourites[favourite].plan_type !== "room_overview"} + + {/if} {/if} - {/each} + + + + {/each} + + + + + + \ No newline at end of file diff --git a/client/src/components/Navbar.svelte b/client/src/components/Navbar.svelte index 85046b13..4746d4f6 100644 --- a/client/src/components/Navbar.svelte +++ b/client/src/components/Navbar.svelte @@ -43,7 +43,7 @@ {/if} {favourite.name} {/each} - +
  • @@ -53,7 +53,7 @@ - + From 774681a47b2cbe25d75ed9fef6dae467be1717b5 Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Fri, 3 Nov 2023 19:38:24 +0100 Subject: [PATCH 3/7] teacher refactor #209 --- backend/lesson_info.py | 59 ++++++++++++-------------- backend/meta_extractor.py | 13 +++--- backend/plan_extractor.py | 51 +++++++++++----------- backend/plan_processor.py | 51 ++++++++-------------- backend/schools/ostwald.py | 14 +++--- backend/schools/taro.py | 9 ++-- backend/teacher.py | 87 +++++++++++++++++++++++--------------- 7 files changed, 142 insertions(+), 142 deletions(-) diff --git a/backend/lesson_info.py b/backend/lesson_info.py index fbfd1abd..bae4e0d3 100644 --- a/backend/lesson_info.py +++ b/backend/lesson_info.py @@ -417,10 +417,6 @@ def create_literal_parsed_info(msg: str) -> ParsedLessonInfo: ) -def resolve_teacher_abbreviations(surnames: list[str], abbreviation_by_surname: dict[str, str]) -> list[str]: - return [abbreviation_by_surname.get(surname, surname) for surname in surnames] - - def _parse_form_plan_message(info: str, lesson: models.Lesson) -> tuple[ParsedLessonInfoMessage, re.Match | None]: if match := _InfoParsers.substitution.match(info): return InsteadOfCourse( @@ -649,14 +645,16 @@ def sorted_canonical(self) -> ParsedLessonInfo: sorted(paragraphs, key=lambda p: [i.parsed.original_messages for i in p.messages]) ) - def resolve_teachers(self, teacher_abbreviation_by_surname: dict[str, str]): + def resolve_teachers(self, teachers: teacher_model.Teachers): for paragraph in self.paragraphs: for message in paragraph.messages: if hasattr(message.parsed, "_teachers"): - message.parsed.other_info_value = resolve_teacher_abbreviations( - message.parsed._teachers, - teacher_abbreviation_by_surname - ) + try: + message.parsed.other_info_value = [ + teachers.query_plan_teacher(teacher_str).plan_short for teacher_str in message.parsed._teachers + ] + except LookupError: + message.parsed.other_info_value = None def lesson_group_sort_key(self) -> list[list[list[str]]]: return [ @@ -682,14 +680,14 @@ def __add__(self, other: ParsedLessonInfo): def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, - logger: logging.Logger) -> dict[str, teacher_model.Teacher]: + logger: logging.Logger) -> typing.Iterable[teacher_model.Teacher]: out: dict[str, teacher_model.Teacher] = {} - for teacher_abbreviation in lesson.teachers or (): - out[teacher_abbreviation] = teacher_model.Teacher(teacher_abbreviation) + for plan_short in lesson.teachers or (): + out[plan_short] = teacher_model.Teacher(plan_short) if lesson._is_scheduled: - return out + return () for paragraph in lesson.parsed_info.paragraphs: for message in paragraph.messages: @@ -740,28 +738,31 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, continue abbreviation = list(_class.values())[0].teacher - teacher = teacher_model.Teacher(abbreviation, None, surname, None, []) - out[teacher.abbreviation] = teacher + out[abbreviation] = teacher_model.Teacher( + plan_short=abbreviation, + plan_long=surname, + last_seen=lesson._lesson_date + ) - return out + return out.values() def process_additional_info(info: list[str], parsed_existing_forms: list[ParsedForm], - teacher_abbreviation_by_surname: dict[str, str], date: datetime.date + teachers: teacher_model.Teachers, date: datetime.date ) -> list[list[LessonInfoTextSegment]]: info = info.copy() while info and not info[-1]: info.pop() return [ - process_additional_info_line(line, parsed_existing_forms, teacher_abbreviation_by_surname, date) + process_additional_info_line(line, parsed_existing_forms, teachers, date) for line in info ] def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedForm], - teacher_abbreviation_by_surname: dict[str, str], date: datetime.date + teachers: teacher_model.Teachers, date: datetime.date ) -> list[LessonInfoTextSegment]: if text is None: return [] @@ -771,7 +772,7 @@ def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedFo text = re.sub(r"\b {1,3}\b", " ", text.strip()) funcs = ( - lambda s: add_fuzzy_teacher_links(s, teacher_abbreviation_by_surname, date), + lambda s: add_fuzzy_teacher_links(s, teachers, date), lambda s: add_fuzzy_form_links(s, parsed_existing_forms, date) ) @@ -857,24 +858,20 @@ def validator(match: re.Match) -> list[LessonInfoTextSegment] | None: return add_fuzzy_with_validator(text, [_loose_parse_form_pattern], validator) -def add_fuzzy_teacher_links(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date): - abbreviations = set(teacher_abbreviation_by_surname.values()) - +def add_fuzzy_teacher_links(text: str, teachers: teacher_model.Teachers, date: datetime.date): def validator(match: re.Match) -> list[LessonInfoTextSegment] | None: surname_or_abbreviation = match.group() - if surname_or_abbreviation not in abbreviations and surname_or_abbreviation in teacher_abbreviation_by_surname: - abbreviation = teacher_abbreviation_by_surname[surname_or_abbreviation] - elif surname_or_abbreviation in abbreviations: - abbreviation = surname_or_abbreviation - else: - abbreviation = None + try: + plan_short = teachers.query_plan_teacher(surname_or_abbreviation).plan_short + except LookupError: + plan_short = None - if abbreviation is not None: + if plan_short is not None: return [ LessonInfoTextSegment( surname_or_abbreviation, - link=LessonInfoTextSegmentLink("teachers", [abbreviation], date, None) + link=LessonInfoTextSegmentLink("teachers", [plan_short], date, None) ) ] else: diff --git a/backend/meta_extractor.py b/backend/meta_extractor.py index 0cb9d920..d93f1b25 100644 --- a/backend/meta_extractor.py +++ b/backend/meta_extractor.py @@ -19,7 +19,7 @@ class DailyMetaExtractor: def __init__(self, plankl_file: str): self.form_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plankl_file)) - def teachers(self) -> dict[str, list[str]]: + def teachers(self) -> dict[str, set[str]]: excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA"] all_teachers = set() @@ -28,11 +28,11 @@ def teachers(self) -> dict[str, list[str]]: if lesson.teacher(): all_teachers.add(lesson.teacher()) - teachers = defaultdict(list, {teacher: [] for teacher in all_teachers}) + teachers = defaultdict(set, {teacher: set() for teacher in all_teachers}) for form in self.form_plan.forms: for class_ in form.classes.values(): if class_.teacher and class_.subject not in excluded_subjects: - teachers[class_.teacher].append(class_.subject) + teachers[class_.teacher].add(class_.subject) return teachers @@ -117,15 +117,12 @@ def rooms(self) -> set[str]: return rooms def teachers(self) -> list[Teacher]: - teachers: dict[str, list[str]] = defaultdict(list) + teachers: dict[str, set[str]] = defaultdict(set) for extractor in self.iterate_daily_extractors(): for _teacher, subjects in extractor.teachers().items(): for teacher in _teacher.split(" "): - teachers[teacher].extend(subjects) - - for teacher, subjects in teachers.items(): - teachers[teacher] = sorted(set(subjects)) + teachers[teacher] |= subjects return [ Teacher(abbreviation, None, None, None, subjects=subjects) diff --git a/backend/plan_extractor.py b/backend/plan_extractor.py index 31a00b33..b4131a9c 100644 --- a/backend/plan_extractor.py +++ b/backend/plan_extractor.py @@ -18,7 +18,7 @@ class PlanExtractor: _logger: logging.Logger plan: Plan - teacher_abbreviation_by_surname: dict[str, str] + teachers: Teachers def fill_in_lesson_times(self): forms: dict[str, indiware_mobil.Form] = {form.short_name: form for form in self.plan.indiware_plan.forms} @@ -77,7 +77,7 @@ def info_data(self, parsed_forms: list[ParsedForm]) -> dict[str, typing.Any]: [i.serialize() for i in line] for line in process_additional_info( self.plan.additional_info, parsed_forms, - self.teacher_abbreviation_by_surname, + self.teachers, self.plan.indiware_plan.date ) ], @@ -87,19 +87,15 @@ def info_data(self, parsed_forms: list[ParsedForm]) -> dict[str, typing.Any]: class StudentsPlanExtractor(PlanExtractor): - def __init__(self, plan_kl: str, vplan_kl: str | None, teacher_abbreviation_by_surname: dict[str, str], *, + def __init__(self, plan_kl: str, vplan_kl: str | None, teachers: Teachers, *, logger: logging.Logger): self._logger = logger + self.teachers = teachers form_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plan_kl)) self.plan = Plan.from_form_plan(form_plan) - self.extracted_teachers = self._extract_teachers() - - self.teacher_abbreviation_by_surname = ( - teacher_abbreviation_by_surname.copy() - | Teachers(list(self.extracted_teachers.values())).abbreviation_by_surname() - ) + self._extract_teachers() if vplan_kl is None: self.substitution_plan = None @@ -112,36 +108,39 @@ def __init__(self, plan_kl: str, vplan_kl: str | None, teacher_abbreviation_by_s self.form_plan_extractor = SubPlanExtractor( self.plan, "forms", - self.teacher_abbreviation_by_surname, + self.teachers, logger=self._logger ) self.room_plan_extractor = SubPlanExtractor( self.plan, "rooms", - self.teacher_abbreviation_by_surname, + self.teachers, logger=self._logger ) self.teacher_plan_extractor = SubPlanExtractor( self.plan, "teachers", - self.teacher_abbreviation_by_surname, + self.teachers, logger=self._logger ) - def _extract_teachers(self) -> dict[str, Teacher]: + def _extract_teachers(self): all_classes = self.plan.get_all_classes() - out: dict[str, Teacher] = {} for lesson in self.plan.lessons: - out |= lesson_info.extract_teachers(lesson, all_classes, logger=self._logger) - - return out + self.teachers.add_teachers( + *lesson_info.extract_teachers(lesson, all_classes, logger=self._logger) + ) def add_lessons_for_unavailable_from_subst_plan(self): for teacher_str in self.substitution_plan.absent_teachers: teacher_name, periods = parse_absent_element(teacher_str) - teacher_abbreviation = self.teacher_abbreviation_by_surname.get(teacher_name, teacher_name) + try: + teacher_abbreviation = self.teachers.query_plan_teacher(teacher_name).plan_short + except LookupError: + self._logger.warning(f" --> Unknown teacher: {teacher_name!r}.") + continue for period in periods or range(1, 11): info = f"{teacher_name}{' den ganzen Tag' if not periods else ''} abwesend laut Vertretungsplan" @@ -184,7 +183,7 @@ def default_plan(self) -> default_plan.DefaultPlanInfo: class SubPlanExtractor: def __init__(self, forms_plan: Plan, plan_type: typing.Literal["forms", "rooms", "teachers"], - teacher_abbreviation_by_surname: dict[str, str], *, logger: logging.Logger): + teachers: Teachers, *, logger: logging.Logger): self._logger = logger self.plan_type = plan_type self.forms_lessons_grouped = ( @@ -196,12 +195,10 @@ def __init__(self, forms_plan: Plan, plan_type: typing.Literal["forms", "rooms", if self.plan_type in ("rooms", "teachers"): self.forms_lessons_grouped = self.forms_lessons_grouped.filter(lambda l: not l.is_internal) - self.resolve_teachers_in_lesson_info(teacher_abbreviation_by_surname) - self.extrapolate_lesson_times(self.forms_lessons_grouped) - - def resolve_teachers_in_lesson_info(self, teacher_abbreviation_by_surname: dict[str, str]): for lesson in self.forms_lessons_grouped: - lesson.parsed_info.resolve_teachers(teacher_abbreviation_by_surname) + lesson.parsed_info.resolve_teachers(teachers) + + self.extrapolate_lesson_times(self.forms_lessons_grouped) @staticmethod def extrapolate_lesson_times(lessons: Lessons): @@ -243,14 +240,14 @@ def grouped_form_plans(self) -> dict[str, dict[str, list]]: class TeachersPlanExtractor: - def __init__(self, plan_le: str, plan_ra: str | None, teacher_abbreviation_by_surname: dict[str, str], *, + def __init__(self, plan_le: str, plan_ra: str | None, teachers: Teachers, *, logger: logging.Logger): self._logger = logger teacher_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plan_le)) self.teacher_plan_extractor = PlanExtractor() self.teacher_plan_extractor.plan = Plan.from_teacher_plan(teacher_plan) - self.teacher_plan_extractor.teacher_abbreviation_by_surname = teacher_abbreviation_by_surname + self.teacher_plan_extractor.teachers = teachers self.teacher_plan_extractor._logger = logger self.teacher_plan_extractor.fill_in_lesson_times() @@ -258,7 +255,7 @@ def __init__(self, plan_le: str, plan_ra: str | None, teacher_abbreviation_by_su room_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plan_ra)) self.room_plan_extractor = PlanExtractor() self.room_plan_extractor.plan = Plan.from_room_plan(room_plan) - self.room_plan_extractor.teacher_abbreviation_by_surname = teacher_abbreviation_by_surname + self.room_plan_extractor.teachers = teachers self.room_plan_extractor._logger = logger self.room_plan_extractor.fill_in_lesson_times() diff --git a/backend/plan_processor.py b/backend/plan_processor.py index b76b6eda..682abb04 100644 --- a/backend/plan_processor.py +++ b/backend/plan_processor.py @@ -7,7 +7,7 @@ from . import schools, default_plan from .cache import Cache from .meta_extractor import MetaExtractor -from .teacher import Teacher, Teachers +from .teacher import Teachers from .models import PlanLesson, Exam from .vplan_utils import group_forms, ParsedForm from .stats import LessonsStatistics @@ -15,7 +15,7 @@ class PlanProcessor: - VERSION = "99" + VERSION = "100" def __init__(self, cache: Cache, school_number: str, *, logger: logging.Logger): self._logger = logger @@ -35,7 +35,12 @@ def load_teachers(self): self._logger.warning("=> Could not load any cached teachers.") return - self.teachers = Teachers.deserialize(data) + try: + self.teachers = Teachers.deserialize(data) + except Exception as e: + self._logger.error("=> Could not deserialize cached teachers.", exc_info=e) + self.teachers = Teachers() + return self._logger.info(f"=> Loaded {len(self.teachers.teachers)} cached teachers.") @@ -73,7 +78,7 @@ def compute_plans(self, date: datetime.date, timestamp: datetime.datetime): vplan_kl = None students_plan_extractor = StudentsPlanExtractor( - plan_kl, vplan_kl, self.teachers.abbreviation_by_surname(), logger=self._logger + plan_kl, vplan_kl, self.teachers, logger=self._logger ) self.cache.store_plan_file( @@ -139,7 +144,6 @@ def compute_plans(self, date: datetime.date, timestamp: datetime.datetime): all_forms = self.meta_extractor.forms() all_forms_parsed = [ParsedForm.from_str(f) for f in all_forms] - students_plan_extractor.teacher_abbreviation_by_surname = self.teachers.abbreviation_by_surname() self.cache.store_plan_file( date, timestamp, @@ -158,7 +162,7 @@ def compute_plans(self, date: datetime.date, timestamp: datetime.datetime): plan_ra = None teachers_plan_extractor = TeachersPlanExtractor( - plan_le, plan_ra, self.teachers.abbreviation_by_surname(), logger=self._logger + plan_le, plan_ra, self.teachers, logger=self._logger ) teachers_plans = { @@ -196,7 +200,6 @@ def compute_plans(self, date: datetime.date, timestamp: datetime.datetime): "rooms.teachers.json" ) - self.add_teachers(students_plan_extractor.extracted_teachers) self.cache.update_newest(date) self.cache.store_plan_file(date, timestamp, str(self.VERSION), ".processed") @@ -214,7 +217,7 @@ def update_meta(self): self.cache.store_meta_file(json.dumps(data), "meta.json") self.cache.store_meta_file(json.dumps(self.meta_extractor.dates_data()), "dates.json") - self.add_teachers({t.abbreviation: t for t in self.meta_extractor.teachers()}) + self.teachers.add_teachers(*self.meta_extractor.teachers()) self.scrape_teachers() self.update_forms() self.update_rooms() @@ -228,41 +231,21 @@ def scrape_teachers(self): if self.school_number not in schools.teacher_scrapers: self._logger.debug("=> No teacher scraper available for this school.") - scraped_teachers = {} + scraped_teachers = [] else: self._logger.info("=> Scraping teachers...") try: - _scraped_teachers = schools.teacher_scrapers[str(self.school_number)]() + scraped_teachers = schools.teacher_scrapers[str(self.school_number)]() except Exception as e: self._logger.error(" -> Exception while scraping teachers.", exc_info=e) - scraped_teachers = {} - else: - scraped_teachers = {teacher.abbreviation: teacher for teacher in _scraped_teachers} + scraped_teachers = [] self._logger.debug(f" -> Found {len(scraped_teachers)} teachers.") - self.add_teachers(scraped_teachers, update_timestamp=True) - self.store_teachers() - - def add_teachers(self, new_teachers: dict[str, Teacher], update_timestamp: bool = False): - self._logger.debug(f"=> Updating teachers... ({len(new_teachers)})") - - old_teachers = self.teachers.to_dict() - all_abbreviations = set(old_teachers.keys()) | set(new_teachers.keys()) + self.teachers.add_teachers(*scraped_teachers) - merged_teachers = [] - for abbreviation in all_abbreviations: - new_teacher = new_teachers.get(abbreviation, Teacher(abbreviation)) - old_teacher = old_teachers.get(abbreviation, Teacher(abbreviation)) - - merged_teachers.append( - Teacher.merge(new_teacher, old_teacher) - ) - - self.teachers = Teachers( - teachers=merged_teachers, - scrape_timestamp=datetime.datetime.now() if update_timestamp else self.teachers.scrape_timestamp - ) + self.teachers.scrape_timestamp = datetime.datetime.now() + self.store_teachers() def store_teachers(self): self._logger.info("* Storing teachers...") diff --git a/backend/schools/ostwald.py b/backend/schools/ostwald.py index 6b201c99..3e227f0d 100644 --- a/backend/schools/ostwald.py +++ b/backend/schools/ostwald.py @@ -34,9 +34,10 @@ def scrape_teachers() -> list[Teacher]: teacher_data.append( Teacher( full_name=None, - surname=name.replace("Madame", "Frau").replace("Monsieur", "Herr"), - abbreviation=kuerzel, - subjects=faecher.replace("G/R/W", "GRW").split(" "), + full_surname=name, + plan_long=Teacher.strip_titles(name.replace("Madame", "Frau").replace("Monsieur", "Herr")), + plan_short=kuerzel, + subjects=set(faecher.replace("G/R/W", "GRW").split(" ")), info=additional_info, contact_link=teacher_link ) @@ -74,9 +75,10 @@ def scrape_teacher(teacher_link: str) -> Teacher: kuerzel = soup.find("span", {"class": "contact-mobile", "itemprop": "telephone"}).text.strip() return Teacher( full_name=None, - surname=name.replace("Madama", "Frau").replace("Monsieur", "Frau"), - subjects=subjects, - abbreviation=kuerzel, + full_surname=name, + plan_long=Teacher.strip_titles(name.replace("Madame", "Frau").replace("Monsieur", "Herr")), + subjects=set(subjects), + plan_short=kuerzel, info=additional_info, contact_link=f"{teacher_link}#display-form" ) diff --git a/backend/schools/taro.py b/backend/schools/taro.py index 4425c0ea..a3a6269e 100644 --- a/backend/schools/taro.py +++ b/backend/schools/taro.py @@ -27,14 +27,17 @@ def get_teachers() -> list[Teacher]: for name in teacher_images: if name[:-4].startswith(teacher_data[2]) and name[:-4].endswith(teacher_data[1]): teacher_lst.append(Teacher( - abbreviation=teacher_data[0], - surname=name[:-4].replace("_", " "), + plan_short=teacher_data[0], + plan_long=name[:-4].replace("_", " "), image_path=name )) break else: teacher_lst.append( - Teacher(abbreviation=teacher_data[0], surname=f"{teacher_data[2]} {teacher_data[1]}") + Teacher( + plan_short=teacher_data[0], + plan_long=f"{teacher_data[2]} {teacher_data[1]}", + ) ) return teacher_lst diff --git a/backend/teacher.py b/backend/teacher.py index cc42d782..5378b60e 100644 --- a/backend/teacher.py +++ b/backend/teacher.py @@ -6,78 +6,99 @@ @dataclasses.dataclass class Teacher: - abbreviation: str | None + plan_short: str full_name: str | None = None - surname: str | None = None - info: str | None = None - subjects: list[str] = dataclasses.field(default_factory=list) + full_surname: str | None = None + plan_long: str | None = None + info: str | None = None # TODO: to set? + subjects: set[str] = dataclasses.field(default_factory=set) contact_link: str | None = None image_path: str | None = None + last_seen: datetime.date = datetime.date.min def serialize(self) -> dict: return { - "abbreviation": self.abbreviation, + "plan_short": self.plan_short, "full_name": self.full_name, - "surname": self.surname, + "full_surname": self.full_surname, + "plan_long": self.plan_long, "info": self.info, - "subjects": self.subjects, + "subjects": list(self.subjects), "contact_link": self.contact_link, - "image_path": self.image_path + "image_path": self.image_path, + "last_seen": self.last_seen.isoformat() } @classmethod def deserialize(cls, data: dict) -> Teacher: return cls( - abbreviation=data["abbreviation"], + plan_short=data["plan_short"], full_name=data["full_name"], - surname=data["surname"], + full_surname=data["full_surname"], + plan_long=data["plan_long"], info=data["info"], - subjects=data["subjects"], + subjects=set(data["subjects"]), contact_link=data.get("contact_link"), image_path=data.get("image_path"), + last_seen=datetime.date.fromisoformat(data["last_seen"]) ) def merge(self, other: Teacher) -> Teacher: return Teacher( - full_name=self.full_name or other.full_name, - surname=self.surname or other.surname, - info=self.info or other.info, - abbreviation=self.abbreviation or other.abbreviation, - subjects=list(set(self.subjects + other.subjects)), - contact_link=self.contact_link or other.contact_link, - image_path=self.image_path or other.image_path, + full_name=other.full_name or self.full_name, + full_surname=other.full_surname or self.full_surname, + plan_long=other.plan_long or self.plan_long, + info=other.info or self.info, + plan_short=other.plan_short or self.plan_short, + subjects=other.subjects | self.subjects, + contact_link=other.contact_link or self.contact_link, + image_path=other.image_path or self.image_path, + last_seen=max(other.last_seen, self.last_seen) ) - def surname_no_titles(self): - """Strip parts of self.surname like "Dr." and return it.""" - if self.surname is not None: - return " ".join(filter(lambda x: "." not in x, self.surname.split(" "))) - else: - return None + @staticmethod + def strip_titles(surname: str) -> str: + return " ".join(filter(lambda x: "." not in x, surname.split(" "))) @dataclasses.dataclass class Teachers: - teachers: list[Teacher] = dataclasses.field(default_factory=list) + teachers: dict[str, Teacher] = dataclasses.field(default_factory=dict) scrape_timestamp: datetime.datetime = datetime.datetime.min def serialize(self) -> dict: return { - "teachers": {teacher.abbreviation: teacher.serialize() for teacher in self.teachers}, + "teachers": {teacher.plan_short: teacher.serialize() for teacher in self.teachers.values()}, "timestamp": self.scrape_timestamp.isoformat() } @classmethod def deserialize(cls, data: dict) -> Teachers: return cls( - teachers=[Teacher.deserialize(teacher) for teacher in data["teachers"].values()], + teachers={key: Teacher.deserialize(teacher) for key, teacher in data["teachers"].items()}, scrape_timestamp=datetime.datetime.fromisoformat(data["timestamp"]) ) - def to_dict(self) -> dict[str, Teacher]: - return {teacher.abbreviation: teacher for teacher in self.teachers} + def add_teachers(self, *teachers: Teacher): + for teacher in teachers: + if teacher.plan_short not in self.teachers: + self.teachers[teacher.plan_short] = teacher + else: + self.teachers[teacher.plan_short] = self.teachers[teacher.plan_short].merge(teacher) - def abbreviation_by_surname(self) -> dict[str, str]: - return {teacher.surname_no_titles(): teacher.abbreviation - for teacher in self.teachers - if teacher.surname is not None} + def query(self, **attrs) -> list[Teacher]: + out = [] + for teacher in self.teachers.values(): + if all(getattr(teacher, attr) == value for attr, value in attrs.items()): + out.append(teacher) + + return sorted(out, key=lambda t: t.last_seen, reverse=True) + + def query_one(self, **attrs) -> Teacher: + try: + return next(iter(self.query(**attrs))) + except StopIteration as e: + raise LookupError("No teacher found matching the given attributes.") from e + + def query_plan_teacher(self, long_or_short: str) -> Teacher: + return self.teachers.get(long_or_short, self.query_one(plan_long=long_or_short)) From ca772025cbdde501d2d2d99e0daf47cf2da681e9 Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Fri, 3 Nov 2023 19:51:23 +0100 Subject: [PATCH 4/7] fix empty teacher names --- backend/lesson_info.py | 5 +++++ backend/teacher.py | 3 +++ 2 files changed, 8 insertions(+) diff --git a/backend/lesson_info.py b/backend/lesson_info.py index bae4e0d3..1c72ebe6 100644 --- a/backend/lesson_info.py +++ b/backend/lesson_info.py @@ -684,6 +684,8 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, out: dict[str, teacher_model.Teacher] = {} for plan_short in lesson.teachers or (): + if not plan_short: + continue out[plan_short] = teacher_model.Teacher(plan_short) if lesson._is_scheduled: @@ -739,6 +741,9 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, abbreviation = list(_class.values())[0].teacher + if not abbreviation: + continue + out[abbreviation] = teacher_model.Teacher( plan_short=abbreviation, plan_long=surname, diff --git a/backend/teacher.py b/backend/teacher.py index 5378b60e..7565b5f7 100644 --- a/backend/teacher.py +++ b/backend/teacher.py @@ -81,6 +81,9 @@ def deserialize(cls, data: dict) -> Teachers: def add_teachers(self, *teachers: Teacher): for teacher in teachers: + if teacher.plan_short == "": + continue + if teacher.plan_short not in self.teachers: self.teachers[teacher.plan_short] = teacher else: From 8b06c843d95da3799009703bcdce60f0b4d81e25 Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Fri, 3 Nov 2023 19:54:03 +0100 Subject: [PATCH 5/7] teacher refactor --- backend/lesson_info.py | 2 +- client/src/components/Plan.svelte | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/lesson_info.py b/backend/lesson_info.py index 1c72ebe6..f8e8cd4d 100644 --- a/backend/lesson_info.py +++ b/backend/lesson_info.py @@ -686,7 +686,7 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, for plan_short in lesson.teachers or (): if not plan_short: continue - out[plan_short] = teacher_model.Teacher(plan_short) + out[plan_short] = teacher_model.Teacher(plan_short, last_seen=lesson._lesson_date) if lesson._is_scheduled: return () diff --git a/client/src/components/Plan.svelte b/client/src/components/Plan.svelte index e3abfee3..bf9a93f8 100644 --- a/client/src/components/Plan.svelte +++ b/client/src/components/Plan.svelte @@ -283,7 +283,7 @@ $: if (plan_type === "teachers") { if (meta.teachers) { - full_teacher_name = meta.teachers[plan_value]?.surname || null; + full_teacher_name = meta.teachers[plan_value]?.full_surname || meta.teachers[plan_value]?.plan_long || null; teacher_contact_link = meta.teachers[plan_value]?.contact_link || null; teacher_image_path = "/public/base_static/images/teachers/" + school_num + "/" + meta.teachers[plan_value]?.image_path || null; teacher_image_path = meta.teachers[plan_value]?.image_path || null; From 01200d238a39150d313d5f7bdf33f09c62580e2b Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Sat, 4 Nov 2023 10:52:05 +0100 Subject: [PATCH 6/7] fix #209, fix #152 --- backend/lesson_info.py | 2 -- backend/meta_extractor.py | 37 +++++++++++------------- backend/plan_processor.py | 2 +- backend/schools/ostwald.py | 5 ++-- backend/teacher.py | 59 ++++++++++++++++++++++++++++++-------- 5 files changed, 67 insertions(+), 38 deletions(-) diff --git a/backend/lesson_info.py b/backend/lesson_info.py index f8e8cd4d..56c17ed1 100644 --- a/backend/lesson_info.py +++ b/backend/lesson_info.py @@ -684,8 +684,6 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *, out: dict[str, teacher_model.Teacher] = {} for plan_short in lesson.teachers or (): - if not plan_short: - continue out[plan_short] = teacher_model.Teacher(plan_short, last_seen=lesson._lesson_date) if lesson._is_scheduled: diff --git a/backend/meta_extractor.py b/backend/meta_extractor.py index d93f1b25..ab32a755 100644 --- a/backend/meta_extractor.py +++ b/backend/meta_extractor.py @@ -19,22 +19,27 @@ class DailyMetaExtractor: def __init__(self, plankl_file: str): self.form_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plankl_file)) - def teachers(self) -> dict[str, set[str]]: - excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA"] + def teachers(self) -> list[Teacher]: + excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA", "EU4"] - all_teachers = set() + out = [] for form in self.form_plan.forms: for lesson in form.lessons: - if lesson.teacher(): - all_teachers.add(lesson.teacher()) + for teacher in (lesson.teacher() or "").split(): + if not teacher: + continue + out.append(Teacher(plan_short=teacher, last_seen=self.form_plan.date)) - teachers = defaultdict(set, {teacher: set() for teacher in all_teachers}) - for form in self.form_plan.forms: for class_ in form.classes.values(): - if class_.teacher and class_.subject not in excluded_subjects: - teachers[class_.teacher].add(class_.subject) + subjects = set(s for s in class_.subject.split() if s not in excluded_subjects) + + for teacher in class_.teacher.split(): + if not teacher: + continue - return teachers + out.append(Teacher(plan_short=teacher, subjects=subjects, last_seen=self.form_plan.date)) + + return out def forms(self) -> list[str]: return [form.short_name for form in self.form_plan.forms] @@ -117,17 +122,7 @@ def rooms(self) -> set[str]: return rooms def teachers(self) -> list[Teacher]: - teachers: dict[str, set[str]] = defaultdict(set) - - for extractor in self.iterate_daily_extractors(): - for _teacher, subjects in extractor.teachers().items(): - for teacher in _teacher.split(" "): - teachers[teacher] |= subjects - - return [ - Teacher(abbreviation, None, None, None, subjects=subjects) - for abbreviation, subjects in teachers.items() - ] + return sum((e.teachers() for e in self.iterate_daily_extractors()), []) def forms(self) -> list[str]: forms: set[str] = set() diff --git a/backend/plan_processor.py b/backend/plan_processor.py index 682abb04..3e8f9b03 100644 --- a/backend/plan_processor.py +++ b/backend/plan_processor.py @@ -15,7 +15,7 @@ class PlanProcessor: - VERSION = "100" + VERSION = "101" def __init__(self, cache: Cache, school_number: str, *, logger: logging.Logger): self._logger = logger diff --git a/backend/schools/ostwald.py b/backend/schools/ostwald.py index 3e227f0d..311b2d8b 100644 --- a/backend/schools/ostwald.py +++ b/backend/schools/ostwald.py @@ -37,7 +37,7 @@ def scrape_teachers() -> list[Teacher]: full_surname=name, plan_long=Teacher.strip_titles(name.replace("Madame", "Frau").replace("Monsieur", "Herr")), plan_short=kuerzel, - subjects=set(faecher.replace("G/R/W", "GRW").split(" ")), + subjects=set(faecher.replace("G/R/W", "GRW").split()), info=additional_info, contact_link=teacher_link ) @@ -62,10 +62,11 @@ def scrape_teacher(teacher_link: str) -> Teacher: if "-" in name_field: name, subjects, _ = name_field.split(" -") name, subjects = name.strip(), subjects.strip() - subjects = subjects.split(" ") + subjects = subjects.split() else: name = name_field subjects = soup.find("span", {"class": "contact-misc"}).find("p").text.strip().split("/") + subjects = sum((s.split() for s in subjects), []) additional_info = soup.find("dd", {"itemprop": "jobTitle"}) if additional_info: diff --git a/backend/teacher.py b/backend/teacher.py index 7565b5f7..f82342a3 100644 --- a/backend/teacher.py +++ b/backend/teacher.py @@ -2,25 +2,47 @@ import dataclasses import datetime +import typing -@dataclasses.dataclass class Teacher: - plan_short: str - full_name: str | None = None - full_surname: str | None = None - plan_long: str | None = None - info: str | None = None # TODO: to set? - subjects: set[str] = dataclasses.field(default_factory=set) - contact_link: str | None = None - image_path: str | None = None - last_seen: datetime.date = datetime.date.min + def __init__(self, + plan_short: str, + full_name: str | None = None, + full_surname: str | None = None, + _plan_long: dict[str, int] = None, + info: str | None = None, + subjects: set[str] = None, + contact_link: str | None = None, + image_path: str | None = None, + last_seen: datetime.date = datetime.date.min, + plan_long: str = None): + _plan_long = {} if _plan_long is None else _plan_long + subjects = set() if subjects is None else subjects + + if plan_long is not None: + _plan_long[plan_long] = 1 + + self.plan_short = plan_short + self.full_name = full_name + self.full_surname = full_surname + self._plan_long: dict[str, int] = _plan_long + self.info = info # TODO: to set? + self.subjects = subjects + self.contact_link = contact_link + self.image_path = image_path + self.last_seen = last_seen + + @property + def plan_long(self) -> str: + return max(self._plan_long, key=self._plan_long.get, default=None) def serialize(self) -> dict: return { "plan_short": self.plan_short, "full_name": self.full_name, "full_surname": self.full_surname, + "_plan_long": self._plan_long, "plan_long": self.plan_long, "info": self.info, "subjects": list(self.subjects), @@ -35,7 +57,7 @@ def deserialize(cls, data: dict) -> Teacher: plan_short=data["plan_short"], full_name=data["full_name"], full_surname=data["full_surname"], - plan_long=data["plan_long"], + _plan_long=data["_plan_long"], info=data["info"], subjects=set(data["subjects"]), contact_link=data.get("contact_link"), @@ -47,7 +69,7 @@ def merge(self, other: Teacher) -> Teacher: return Teacher( full_name=other.full_name or self.full_name, full_surname=other.full_surname or self.full_surname, - plan_long=other.plan_long or self.plan_long, + _plan_long={k: v1 + v2 for k, v1, v2 in zip_dicts(self._plan_long, other._plan_long, default=0)}, info=other.info or self.info, plan_short=other.plan_short or self.plan_short, subjects=other.subjects | self.subjects, @@ -105,3 +127,16 @@ def query_one(self, **attrs) -> Teacher: def query_plan_teacher(self, long_or_short: str) -> Teacher: return self.teachers.get(long_or_short, self.query_one(plan_long=long_or_short)) + + +_UNSET = object() + + +def zip_dicts(*dicts: dict, default=_UNSET) -> typing.Generator[tuple, None, None]: + if default is _UNSET: + all_keys = set(dicts[0]).intersection(*dicts) + else: + all_keys = set().union(*dicts) + + for key in all_keys: + yield key, *tuple(d.get(key, default) for d in dicts) From 360c40a7cb2f05d164f5fbcb4ff42573db00b937 Mon Sep 17 00:00:00 2001 From: Belissimo_T Date: Sat, 4 Nov 2023 11:38:14 +0100 Subject: [PATCH 7/7] add utils.py --- backend/import_files.py | 4 +-- backend/load_plans.py | 34 ++++++++++++++----------- backend/meta_extractor.py | 3 ++- backend/utils.py | 53 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 18 deletions(-) create mode 100644 backend/utils.py diff --git a/backend/import_files.py b/backend/import_files.py index 1aee9808..54ad7ce4 100644 --- a/backend/import_files.py +++ b/backend/import_files.py @@ -3,11 +3,11 @@ import sys from pathlib import Path -from .load_plans import get_clients +from .load_plans import get_crawlers async def main(): - clients = await get_clients() + clients = await get_crawlers() directory = Path(sys.argv[1]) diff --git a/backend/load_plans.py b/backend/load_plans.py index 7b194d12..1946901d 100644 --- a/backend/load_plans.py +++ b/backend/load_plans.py @@ -54,29 +54,33 @@ async def check_infinite(self, interval: int = 60, *, once: bool = False, ignore await asyncio.sleep(interval) -async def get_clients(session: aiohttp.ClientSession | None = None, - proxy_provider: proxies.ProxyProvider | None = None) -> dict[str, PlanCrawler]: +async def get_crawlers(session: aiohttp.ClientSession | None = None, + proxy_provider: proxies.ProxyProvider | None = None, + create_clients: bool = True) -> dict[str, PlanCrawler]: creds_provider = creds_provider_factory(Path("creds.json")) _creds = creds_provider.get_creds() - clients = {} + crawlers = {} for school_name, data in _creds.items(): specifier = data['school_number'] if 'school_number' in data else school_name logger = logging.getLogger(specifier) cache = Cache(Path(f".cache/{specifier}").absolute()) - data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students")) - hosting = Hosting.deserialize(data["hosting"]) - client = IndiwareStundenplanerClient(hosting, session) + if create_clients: + data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students")) + hosting = Hosting.deserialize(data["hosting"]) + client = IndiwareStundenplanerClient(hosting, session) - for plan_client in client.substitution_plan_clients: - plan_client.proxy_provider = proxy_provider - plan_client.no_delay = True + for plan_client in client.substitution_plan_clients: + plan_client.proxy_provider = proxy_provider + plan_client.no_delay = True - for plan_client in client.indiware_mobil_clients: - plan_client.proxy_provider = proxy_provider - plan_client.no_delay = True + for plan_client in client.indiware_mobil_clients: + plan_client.proxy_provider = proxy_provider + plan_client.no_delay = True + else: + client = None plan_downloader = PlanDownloader(client, cache, logger=logger) plan_processor = PlanProcessor(cache, specifier, logger=logger) @@ -84,9 +88,9 @@ async def get_clients(session: aiohttp.ClientSession | None = None, # create crawler p = PlanCrawler(plan_downloader, plan_processor) - clients[school_name] = p + crawlers[school_name] = p - return clients + return crawlers async def main(): @@ -119,7 +123,7 @@ async def main(): never_raise_out_of_proxies=args.never_raise_out_of_proxies) # list(proxy_provider.fetch_proxies()) - clients = await get_clients(proxy_provider=proxy_provider) + clients = await get_crawlers(proxy_provider=proxy_provider, create_clients=not args.only_process) try: if args.only_process: for client in clients.values(): diff --git a/backend/meta_extractor.py b/backend/meta_extractor.py index ab32a755..1caa04f6 100644 --- a/backend/meta_extractor.py +++ b/backend/meta_extractor.py @@ -78,7 +78,7 @@ def free_days(self) -> list[datetime.date]: class MetaExtractor: - def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Logger): + def __init__(self, cache: Cache, num_last_days: int | None = 10, *, logger: logging.Logger): self._logger = logger self.cache = cache @@ -90,6 +90,7 @@ def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Log def iterate_daily_extractors(self) -> typing.Generator[DailyMetaExtractor, None, None]: for day in self.cache.get_days()[:self.num_last_days]: for timestamp in self.cache.get_timestamps(day): + self._logger.log(5, f"Yielding DailyMetaExtractor for {day!s} {timestamp!s}.") if (day, timestamp) in self._daily_extractors: yield self._daily_extractors[(day, timestamp)] else: diff --git a/backend/utils.py b/backend/utils.py new file mode 100644 index 00000000..4c11ecf3 --- /dev/null +++ b/backend/utils.py @@ -0,0 +1,53 @@ +import argparse +import asyncio +import logging + +from backend import meta_extractor +from backend.load_plans import get_crawlers + + +async def main(): + logging.basicConfig(level=1, format="[%(asctime)s] [%(levelname)8s] %(name)s: %(message)s", + datefmt="%Y-%m-%d %H:%M:%S") + + argparser = argparse.ArgumentParser() + + subparsers = argparser.add_subparsers(dest="subcommand") + + migrate_all = subparsers.add_parser("migrate-all") + extract_all_teachers = subparsers.add_parser("extract-all-teachers") + + args = argparser.parse_args() + + crawlers = await get_crawlers(create_clients=False) + + if args.subcommand == "migrate-all": + for crawler in crawlers.values(): + for day in crawler.plan_processor.cache.get_days(): + crawler.plan_processor.cache.update_newest(day) + + for revision in crawler.plan_processor.cache.get_timestamps(day): + crawler.plan_processor._logger.info(f"Computing plans for {day} {revision}...") + crawler.plan_processor.compute_plans(day, revision) + + crawler.plan_processor.update_meta() + crawler.plan_processor.update_default_plan() + crawler.plan_processor.store_teachers() + + elif args.subcommand == "extract-all-teachers": + for crawler in crawlers.values(): + crawler.plan_processor._logger.info("Extracting teachers...") + + class NullDict(dict): + def __setattr__(self, key, value): + pass + + extractor = meta_extractor.MetaExtractor(crawler.plan_processor.cache, num_last_days=None, + logger=crawler.plan_processor._logger) + extractor._daily_extractors = NullDict() + crawler.plan_processor.teachers.add_teachers(*extractor.teachers()) + crawler.plan_processor.store_teachers() + + +if __name__ == '__main__': + asyncio.run(main())