Skip to content

Commit

Permalink
Merge pull request #225 from ArtrenH/main
Browse files Browse the repository at this point in the history
2023-11-04
  • Loading branch information
Belissimo-T authored Nov 4, 2023
2 parents a7f5ba7 + 2b05f10 commit 056e116
Show file tree
Hide file tree
Showing 16 changed files with 462 additions and 275 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
git checkout production
git pull
source venv/bin/activate.fish
pip install --upgrade pip
pip install -r requirements.txt
cd client
Expand Down
4 changes: 2 additions & 2 deletions backend/import_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
import sys
from pathlib import Path

from .load_plans import get_clients
from .load_plans import get_crawlers


async def main():
clients = await get_clients()
clients = await get_crawlers()

directory = Path(sys.argv[1])

Expand Down
62 changes: 31 additions & 31 deletions backend/lesson_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,10 +417,6 @@ def create_literal_parsed_info(msg: str) -> ParsedLessonInfo:
)


def resolve_teacher_abbreviations(surnames: list[str], abbreviation_by_surname: dict[str, str]) -> list[str]:
return [abbreviation_by_surname.get(surname, surname) for surname in surnames]


def _parse_form_plan_message(info: str, lesson: models.Lesson) -> tuple[ParsedLessonInfoMessage, re.Match | None]:
if match := _InfoParsers.substitution.match(info):
return InsteadOfCourse(
Expand Down Expand Up @@ -649,14 +645,16 @@ def sorted_canonical(self) -> ParsedLessonInfo:
sorted(paragraphs, key=lambda p: [i.parsed.original_messages for i in p.messages])
)

def resolve_teachers(self, teacher_abbreviation_by_surname: dict[str, str]):
def resolve_teachers(self, teachers: teacher_model.Teachers):
for paragraph in self.paragraphs:
for message in paragraph.messages:
if hasattr(message.parsed, "_teachers"):
message.parsed.other_info_value = resolve_teacher_abbreviations(
message.parsed._teachers,
teacher_abbreviation_by_surname
)
try:
message.parsed.other_info_value = [
teachers.query_plan_teacher(teacher_str).plan_short for teacher_str in message.parsed._teachers
]
except LookupError:
message.parsed.other_info_value = None

def lesson_group_sort_key(self) -> list[list[list[str]]]:
return [
Expand All @@ -682,14 +680,14 @@ def __add__(self, other: ParsedLessonInfo):


def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *,
logger: logging.Logger) -> dict[str, teacher_model.Teacher]:
logger: logging.Logger) -> typing.Iterable[teacher_model.Teacher]:
out: dict[str, teacher_model.Teacher] = {}

for teacher_abbreviation in lesson.teachers or ():
out[teacher_abbreviation] = teacher_model.Teacher(teacher_abbreviation)
for plan_short in lesson.teachers or ():
out[plan_short] = teacher_model.Teacher(plan_short, last_seen=lesson._lesson_date)

if lesson._is_scheduled:
return out
return ()

for paragraph in lesson.parsed_info.paragraphs:
for message in paragraph.messages:
Expand Down Expand Up @@ -740,28 +738,34 @@ def extract_teachers(lesson: models.Lesson, classes: dict[str, models.Class], *,
continue

abbreviation = list(_class.values())[0].teacher
teacher = teacher_model.Teacher(abbreviation, None, surname, None, [])

out[teacher.abbreviation] = teacher
if not abbreviation:
continue

return out
out[abbreviation] = teacher_model.Teacher(
plan_short=abbreviation,
plan_long=surname,
last_seen=lesson._lesson_date
)

return out.values()


def process_additional_info(info: list[str], parsed_existing_forms: list[ParsedForm],
teacher_abbreviation_by_surname: dict[str, str], date: datetime.date
teachers: teacher_model.Teachers, date: datetime.date
) -> list[list[LessonInfoTextSegment]]:
info = info.copy()
while info and not info[-1]:
info.pop()

return [
process_additional_info_line(line, parsed_existing_forms, teacher_abbreviation_by_surname, date)
process_additional_info_line(line, parsed_existing_forms, teachers, date)
for line in info
]


def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedForm],
teacher_abbreviation_by_surname: dict[str, str], date: datetime.date
teachers: teacher_model.Teachers, date: datetime.date
) -> list[LessonInfoTextSegment]:
if text is None:
return []
Expand All @@ -771,7 +775,7 @@ def process_additional_info_line(text: str, parsed_existing_forms: list[ParsedFo
text = re.sub(r"\b {1,3}\b", " ", text.strip())

funcs = (
lambda s: add_fuzzy_teacher_links(s, teacher_abbreviation_by_surname, date),
lambda s: add_fuzzy_teacher_links(s, teachers, date),
lambda s: add_fuzzy_form_links(s, parsed_existing_forms, date)
)

Expand Down Expand Up @@ -857,24 +861,20 @@ def validator(match: re.Match) -> list[LessonInfoTextSegment] | None:
return add_fuzzy_with_validator(text, [_loose_parse_form_pattern], validator)


def add_fuzzy_teacher_links(text: str, teacher_abbreviation_by_surname: dict[str, str], date: datetime.date):
abbreviations = set(teacher_abbreviation_by_surname.values())

def add_fuzzy_teacher_links(text: str, teachers: teacher_model.Teachers, date: datetime.date):
def validator(match: re.Match) -> list[LessonInfoTextSegment] | None:
surname_or_abbreviation = match.group()

if surname_or_abbreviation not in abbreviations and surname_or_abbreviation in teacher_abbreviation_by_surname:
abbreviation = teacher_abbreviation_by_surname[surname_or_abbreviation]
elif surname_or_abbreviation in abbreviations:
abbreviation = surname_or_abbreviation
else:
abbreviation = None
try:
plan_short = teachers.query_plan_teacher(surname_or_abbreviation).plan_short
except LookupError:
plan_short = None

if abbreviation is not None:
if plan_short is not None:
return [
LessonInfoTextSegment(
surname_or_abbreviation,
link=LessonInfoTextSegmentLink("teachers", [abbreviation], date, None)
link=LessonInfoTextSegmentLink("teachers", [plan_short], date, None)
)
]
else:
Expand Down
34 changes: 19 additions & 15 deletions backend/load_plans.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,39 +54,43 @@ async def check_infinite(self, interval: int = 60, *, once: bool = False, ignore
await asyncio.sleep(interval)


async def get_clients(session: aiohttp.ClientSession | None = None,
proxy_provider: proxies.ProxyProvider | None = None) -> dict[str, PlanCrawler]:
async def get_crawlers(session: aiohttp.ClientSession | None = None,
proxy_provider: proxies.ProxyProvider | None = None,
create_clients: bool = True) -> dict[str, PlanCrawler]:
creds_provider = creds_provider_factory(Path("creds.json"))
_creds = creds_provider.get_creds()

clients = {}
crawlers = {}

for school_name, data in _creds.items():
specifier = data['school_number'] if 'school_number' in data else school_name
logger = logging.getLogger(specifier)
cache = Cache(Path(f".cache/{specifier}").absolute())

data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students"))
hosting = Hosting.deserialize(data["hosting"])
client = IndiwareStundenplanerClient(hosting, session)
if create_clients:
data["hosting"]["creds"] = data["hosting"]["creds"].get("teachers", data["hosting"]["creds"].get("students"))
hosting = Hosting.deserialize(data["hosting"])
client = IndiwareStundenplanerClient(hosting, session)

for plan_client in client.substitution_plan_clients:
plan_client.proxy_provider = proxy_provider
plan_client.no_delay = True
for plan_client in client.substitution_plan_clients:
plan_client.proxy_provider = proxy_provider
plan_client.no_delay = True

for plan_client in client.indiware_mobil_clients:
plan_client.proxy_provider = proxy_provider
plan_client.no_delay = True
for plan_client in client.indiware_mobil_clients:
plan_client.proxy_provider = proxy_provider
plan_client.no_delay = True
else:
client = None

plan_downloader = PlanDownloader(client, cache, logger=logger)
plan_processor = PlanProcessor(cache, specifier, logger=logger)

# create crawler
p = PlanCrawler(plan_downloader, plan_processor)

clients[school_name] = p
crawlers[school_name] = p

return clients
return crawlers


async def main():
Expand Down Expand Up @@ -119,7 +123,7 @@ async def main():
never_raise_out_of_proxies=args.never_raise_out_of_proxies)
# list(proxy_provider.fetch_proxies())

clients = await get_clients(proxy_provider=proxy_provider)
clients = await get_crawlers(proxy_provider=proxy_provider, create_clients=not args.only_process)
try:
if args.only_process:
for client in clients.values():
Expand Down
43 changes: 18 additions & 25 deletions backend/meta_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,22 +19,27 @@ class DailyMetaExtractor:
def __init__(self, plankl_file: str):
self.form_plan = indiware_mobil.IndiwareMobilPlan.from_xml(ET.fromstring(plankl_file))

def teachers(self) -> dict[str, list[str]]:
excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA"]
def teachers(self) -> list[Teacher]:
excluded_subjects = ["KL", "AnSt", "FÖ", "WB", "GTA", "EU4"]

all_teachers = set()
out = []
for form in self.form_plan.forms:
for lesson in form.lessons:
if lesson.teacher():
all_teachers.add(lesson.teacher())
for teacher in (lesson.teacher() or "").split():
if not teacher:
continue
out.append(Teacher(plan_short=teacher, last_seen=self.form_plan.date))

teachers = defaultdict(list, {teacher: [] for teacher in all_teachers})
for form in self.form_plan.forms:
for class_ in form.classes.values():
if class_.teacher and class_.subject not in excluded_subjects:
teachers[class_.teacher].append(class_.subject)
subjects = set(s for s in class_.subject.split() if s not in excluded_subjects)

return teachers
for teacher in class_.teacher.split():
if not teacher:
continue

out.append(Teacher(plan_short=teacher, subjects=subjects, last_seen=self.form_plan.date))

return out

def forms(self) -> list[str]:
return [form.short_name for form in self.form_plan.forms]
Expand Down Expand Up @@ -73,7 +78,7 @@ def free_days(self) -> list[datetime.date]:


class MetaExtractor:
def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Logger):
def __init__(self, cache: Cache, num_last_days: int | None = 10, *, logger: logging.Logger):
self._logger = logger

self.cache = cache
Expand All @@ -85,6 +90,7 @@ def __init__(self, cache: Cache, num_last_days: int = 10, *, logger: logging.Log
def iterate_daily_extractors(self) -> typing.Generator[DailyMetaExtractor, None, None]:
for day in self.cache.get_days()[:self.num_last_days]:
for timestamp in self.cache.get_timestamps(day):
self._logger.log(5, f"Yielding DailyMetaExtractor for {day!s} {timestamp!s}.")
if (day, timestamp) in self._daily_extractors:
yield self._daily_extractors[(day, timestamp)]
else:
Expand Down Expand Up @@ -117,20 +123,7 @@ def rooms(self) -> set[str]:
return rooms

def teachers(self) -> list[Teacher]:
teachers: dict[str, list[str]] = defaultdict(list)

for extractor in self.iterate_daily_extractors():
for _teacher, subjects in extractor.teachers().items():
for teacher in _teacher.split(" "):
teachers[teacher].extend(subjects)

for teacher, subjects in teachers.items():
teachers[teacher] = sorted(set(subjects))

return [
Teacher(abbreviation, None, None, None, subjects=subjects)
for abbreviation, subjects in teachers.items()
]
return sum((e.teachers() for e in self.iterate_daily_extractors()), [])

def forms(self) -> list[str]:
forms: set[str] = set()
Expand Down
Loading

0 comments on commit 056e116

Please sign in to comment.