Skip to content

Commit

Permalink
Merge pull request #259 from ArtrenH/main
Browse files Browse the repository at this point in the history
2023-12-03
  • Loading branch information
Belissimo-T authored Dec 3, 2023
2 parents ec59510 + 26ae598 commit 907c4fa
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 45 deletions.
71 changes: 44 additions & 27 deletions backend/_event_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@ def main():
v["_id"]: v["display_name"] for v in creds.values()
}

cutoff = datetime.datetime(2023, 11, 30, tzinfo=datetime.timezone.utc)

def plot_basic(is_log: bool, data, title: str, unit: typing.Literal["seconds", None] = "seconds"):
fig, ax = plt.subplots(layout='constrained')

for school_number, points in data.items():
data[school_number] = filter(lambda x: x[0] > cutoff, points)

for school_number, points in data.items():
ax.plot(*zip(*points), label=school_number, marker="x", linestyle="solid", alpha=0.5, linewidth=1,
markersize=5)
Expand All @@ -30,7 +35,7 @@ def plot_basic(is_log: bool, data, title: str, unit: typing.Literal["seconds", N
l.set_marker(marker_type)
l.set_linestyle(line_style)

ax.set_ylim(bottom=0, top=max(max(y for x, y in points) for points in data.values()) * 1.1)
# ax.set_ylim(bottom=0, top=max(max(y for x, y in points) for points in data.values()) * 1.1)

if unit == "seconds":
ax.set_ylabel('Sekunden')
Expand All @@ -52,6 +57,20 @@ def plot_basic(is_log: bool, data, title: str, unit: typing.Literal["seconds", N
plt.title(title)
plt.show()

def plan_size(is_log: bool = False):
data: dict[str, list[tuple[datetime.datetime, float]]] = defaultdict(list)
for event in events.iterate_events(events.PlanDownload):
if event.file_length is None or event.plan_type != "PlanKl.xml":
continue
y = event.file_length
x = event.start_time

data[creds[event.school_number]].append((x, y))

title = "Größe des heruntergeladenen Plans"

plot_basic(is_log, data, title, unit=None)

def time_from_upload_till_available(is_log: bool = False):
title = "Zeit von Planupload bis Bereitstellung auf VPlan.fr"

Expand All @@ -60,9 +79,6 @@ def time_from_upload_till_available(is_log: bool = False):
y = (event.start_time - event.revision).total_seconds()
x = event.start_time

if x < datetime.datetime(2023, 11, 26, tzinfo=datetime.timezone.utc):
continue

data[creds[event.school_number]].append((x, y))

plot_basic(is_log, data, title)
Expand All @@ -71,12 +87,11 @@ def time_from_upload_till_download(is_log: bool = False):
data: dict[str, list[tuple[datetime.datetime, float]]] = defaultdict(list)
for event in events.iterate_events(events.PlanDownload):
# TODO: Differentiate PlanKl and VplanKl
if not event.plan_type == "PlanKl.xml":
continue
y = (event.start_time - event.last_modified).total_seconds()
x = event.start_time

if x < datetime.datetime(2023, 11, 26, tzinfo=datetime.timezone.utc):
continue

data[creds[event.school_number]].append((x, y))

title = "Zeit von Planupload bis Download vom Crawler"
Expand All @@ -88,8 +103,7 @@ def duration_of(event_type, is_log: bool = False):
for event in events.iterate_events(event_type):
y = (event.end_time - event.start_time).total_seconds()
x = event.start_time
# if x < datetime.datetime(2023, 11, 26, tzinfo=datetime.timezone.utc):
# continue

data[creds[event.school_number]].append((x, y))

title = f"Dauer von: {event_type.__name__}"
Expand All @@ -103,30 +117,33 @@ def num_proxies(is_log: bool = False):
if y is None:
continue
x = event.start_time
# if x < datetime.datetime(2023, 11, 26, tzinfo=datetime.timezone.utc):
# continue

data[creds[event.school_number]].append((x, y))

title = f"Anzahl benutzer Proxies pro Download"

plot_basic(is_log, data, title, unit=None)

# total = 0
#
# for event in events.iterate_events(events.PlanDownload):
# x = event.start_time
# if x < datetime.datetime(2023, 11, 29, tzinfo=datetime.timezone.utc):
# continue
#
# total += event.file_length
#
# print(total)

time_from_upload_till_available()
time_from_upload_till_download()
duration_of(events.PlanCrawlCycle)
duration_of(events.StudentsRevisionProcessed)
num_proxies()
total = 0

for event in events.iterate_events(events.PlanDownload):
x = event.start_time

if event.file_length is None:
continue

total += event.file_length

print(total)

plan_size(is_log=True)
time_from_upload_till_available(is_log=True)
time_from_upload_till_download(is_log=True)
duration_of(events.PlanCrawlCycle, is_log=True)
duration_of(events.StudentsRevisionProcessed, is_log=True)
duration_of(events.TeacherScrape, is_log=True)
duration_of(events.MetaUpdate, is_log=True)
num_proxies(is_log=True)


if __name__ == '__main__':
Expand Down
4 changes: 2 additions & 2 deletions backend/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,9 @@ def iterate_events(type_: typing.Type[_T2], school_number: str | None = None) ->
if field.name in base_keys:
continue

if field.type is datetime.datetime:
if field.type is datetime.datetime and obj.__dict__[field.name] is not None:
obj.__dict__[field.name] = datetime.datetime.fromisoformat(obj.__dict__[field.name])
elif field.type is datetime.date:
elif field.type is datetime.date and obj.__dict__[field.name] is not None:
obj.__dict__[field.name] = datetime.date.fromisoformat(obj.__dict__[field.name])

obj.__dict__["school_number"] = event["school_number"]
Expand Down
18 changes: 8 additions & 10 deletions backend/load_plans.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ def __init__(self, school_number: str, plan_downloader: PlanDownloader, plan_pro
self.school_number = school_number
self.plan_downloader = plan_downloader
self.plan_processor = plan_processor
self._plan_compute_thread_pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
self._plan_compute_executor = concurrent.futures.ThreadPoolExecutor(max_workers=3)

async def check_infinite(self, interval: int = 60, *, once: bool = False, ignore_exceptions: bool = False):
self.plan_downloader.migrate_all()
Expand All @@ -37,20 +37,18 @@ async def check_infinite(self, interval: int = 60, *, once: bool = False, ignore
try:
updated_dates = await self.plan_downloader.update_fetch()

def _process_plans():
self._plan_compute_executor.map(
self.plan_processor.update_day_plans, updated_dates
)
self.plan_processor.update_after_plan_processing()

if updated_dates:
self.plan_processor._logger.debug("* Processing plans...")
self.plan_processor.meta_extractor.invalidate_cache()
self._plan_compute_executor.submit(_process_plans)
else:
self.plan_processor._logger.debug("* No plans to process.")

for date in updated_dates:
self._plan_compute_thread_pool.submit(
self.plan_processor.update_day_plans, date
)

if updated_dates:
self.plan_processor.update_after_plan_processing()

except Exception as e:
if not ignore_exceptions:
raise
Expand Down
9 changes: 6 additions & 3 deletions backend/meta_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import datetime
import logging
import threading
import typing
from xml.etree import ElementTree as ET

Expand Down Expand Up @@ -83,6 +84,7 @@ def __init__(self, cache: Cache, num_last_days: int | None = 10, *, logger: logg

self._rooms: set[str] | None = None
self._daily_extractors: dict[tuple[datetime.date, datetime.datetime], DailyMetaExtractor] = {}
self._daily_extractors_lock = threading.Lock()
self._max_cached_extractors = 10

def iterate_daily_extractors(self) -> typing.Generator[DailyMetaExtractor, None, None]:
Expand All @@ -103,9 +105,10 @@ def iterate_daily_extractors(self) -> typing.Generator[DailyMetaExtractor, None,
self._logger.error(f"Failed to parse PlanKl.xml for {day!s} {timestamp!s}.")
continue

self._daily_extractors[(day, timestamp)] = extractor
while len(self._daily_extractors) > self._max_cached_extractors:
self._daily_extractors.pop(next(iter(self._daily_extractors)))
with self._daily_extractors_lock:
self._daily_extractors[(day, timestamp)] = extractor
while len(self._daily_extractors) > self._max_cached_extractors:
self._daily_extractors.pop(next(iter(self._daily_extractors)))

yield extractor

Expand Down
4 changes: 2 additions & 2 deletions datascience/user_creation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import datetime
import typing

from utils import users, creds
from datascience.helpers import load_database, download_databases
Expand Down Expand Up @@ -93,13 +94,12 @@ def plot_school_counts():


def get_settings_usage():
excluded = ["favourite"]
settings = users.find({"settings": {"$exists": True}}, {"settings": 1})
settings = [user_settings["settings"] for user_settings in settings]
settings_counts = {}
for setting in settings:
for key in setting:
if key in excluded:
if not isinstance(setting[key], typing.Hashable):
continue
if key not in settings_counts:
settings_counts[key] = {}
Expand Down
5 changes: 4 additions & 1 deletion shared/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,10 @@ def set_newest(self, day: datetime.date, timestamp: datetime.datetime):
target_path = self.get_plan_path(day, timestamp)

newest_path.unlink(missing_ok=True)
newest_path.symlink_to(target_path, target_is_directory=True)
try:
newest_path.symlink_to(target_path, target_is_directory=True)
except FileExistsError:
pass

def update_newest(self, day: datetime.date):
timestamps = self.get_timestamps(day)
Expand Down

0 comments on commit 907c4fa

Please sign in to comment.