From 93967246eae4be7b4aeddf8d786af23973d04c2b Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 24 Jan 2024 23:06:47 +0100 Subject: [PATCH 1/5] Improve robustness of allergen matching --- server/scraper/resto/menu.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/server/scraper/resto/menu.py b/server/scraper/resto/menu.py index e5c73e56..f3204c22 100755 --- a/server/scraper/resto/menu.py +++ b/server/scraper/resto/menu.py @@ -295,6 +295,11 @@ def find_allergens_for_food(allergens: Dict[str, str], food: str) -> list[str]: found = [] for part in food_parts: found += allergens.get(part, []) + # Also do the reverse search if we didn't find any allergens. + if not found: + for allergen_food, allergens in allergens.items(): + if allergen_food in food: + found += allergens return found From 8c57debab5aec60506b6b6de78ad5de4d2e0b361 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 24 Jan 2024 23:12:55 +0100 Subject: [PATCH 2/5] Update docs for update API --- api-resto-02.md | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) diff --git a/api-resto-02.md b/api-resto-02.md index 3393a68a..7e361614 100644 --- a/api-resto-02.md +++ b/api-resto-02.md @@ -15,7 +15,7 @@ This document describes the current version of the API, version 2.0. | Version | Endpoint | Status | |------------------------|---------------------------------------|---------| | [1.0](api-resto-01.md) | https://hydra.ugent.be/api/1.0/resto/ | retired | -| 2.4 (this) | https://hydra.ugent.be/api/2.0/resto/ | current | +| 2.5 (this) | https://hydra.ugent.be/api/2.0/resto/ | current | ## Data dump @@ -34,6 +34,7 @@ need all available data, it is probably easier and faster to download or clone t - At some point in 2021 or early 2022, the zeus.ugent.be/hydra endpoint stopped working. We could fix it, but we assume most clients have migrated or are able to. - _October 2022_ - Allergen information was added. +- _January 2024_ - Allergen information has been added to vegetables, with the field `vegetables2`. ## Technical description @@ -187,6 +188,15 @@ Returns the menu for each available day in the future, including today. Sample o "Bloemkool", "Prinsessengroenten" ], + "vegetables2": [ + { + "kind": "vegan", + "name": "Bloemkool", + "allergens": [ + "Bloemkool" + ] + } + ], "message": "Alle studenten krijgen op vertoon van Hydra 150% korting." } ] @@ -256,13 +266,14 @@ A sample endpoint is `/menu/nl/2017/5/18.json`. Sample output is: A menu object consists of: -| Field | Description | -|--------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `date` | The date of the menu. The date's format follows ISO 8601:2004's extended format (`YYYY-MM-DD`). | -| `open` | If set to `true`, the resto is open, otherwise not. If set to `false`.

Note that this is no guarantee: some days (like the weekends) are simply not present in the output. | -| `vegetables` | A list of available vegetables. | -| `meals` | A list of meal objects (see below). | -| `message` | Optional field containing a message to be displayed. Used for exceptional closures or changes in the menu. For example, if `open` is `false`, the message could be an explanation for the closure. | +| Field | Description | +|---------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `date` | The date of the menu. The date's format follows ISO 8601:2004's extended format (`YYYY-MM-DD`). | +| `open` | If set to `true`, the resto is open, otherwise not. If set to `false`.

Note that this is no guarantee: some days (like the weekends) are simply not present in the output. | +| `vegetables` | A list of available vegetables. | +| `vegetables2` | A list of available vegetables in object form, with the kind and allergen information present, see below. | +| `meals` | A list of meal objects (see below). | +| `message` | Optional field containing a message to be displayed. Used for exceptional closures or changes in the menu. For example, if `open` is `false`, the message could be an explanation for the closure. | A meal object consists of: @@ -274,8 +285,16 @@ A meal object consists of: | `type` | The meal type. Is currently `main` or `side`, but applications must be able to handle changes to the possible values. | | `allergens` | List of allergens, matched on a best-efforts basis from the [allergen information](#allergen-information). | +A vegetable object consists of: + +| Field | Description | +|-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `kind` | The kind of the vegetable. A subset of the meal kind, currently `meat`, `vegetarian`, or `vegan`. Applications must be able to handle changes to the possible values. | +| `name` | The name of the vegetable. | +| `allergens` | List of allergens, matched on a best-efforts basis from the [allergen information](#allergen-information). | + > **Warning** -> The allergen information, like all other information in the API, is available on a best-efforts basis. +> The allergen information, like all other information in the API, is available on a best-effort basis. > Particularly, this information IS NOT FIT to replace the legally mandated information about allergens. > When showing these data to users, please inform them of this and link to the web page. From 0f10162424b3457c6c8c8f0ea1f847a4f64ab2e9 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 24 Jan 2024 23:25:34 +0100 Subject: [PATCH 3/5] Improve allergen scraping --- server/scraper/resto/allergens.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/server/scraper/resto/allergens.py b/server/scraper/resto/allergens.py index 7374f008..91bf303b 100755 --- a/server/scraper/resto/allergens.py +++ b/server/scraper/resto/allergens.py @@ -1,5 +1,6 @@ #!/usr/bin/env python3 import argparse +import itertools import os import sys from typing import Union @@ -16,7 +17,10 @@ URL = "https://www.ugent.be/student/nl/meer-dan-studeren/resto/allergenen" SKIPPED_ELEMENTS = [ "vegetarisch", - "vegan" + "vegan", + "veggie", + "msc", + "asc" ] @@ -33,7 +37,7 @@ def parse_section_item(section_item: str) -> Union[dict[str, list[str]], None]: item_name = "Soep van de dag" item_allergen_list = section_item else: - item_name, item_allergen_list = section_item.split(":", maxsplit=1) + item_name, item_allergen_list = section_item.rsplit(":", maxsplit=1) # Sometimes a section will have extra info before the item list, # this should not be parsed @@ -41,10 +45,13 @@ def parse_section_item(section_item: str) -> Union[dict[str, list[str]], None]: return None item_allergens = list(map(lambda a: a.strip(), item_allergen_list.split(","))) + # Split items with "-" + item_allergens = list(itertools.chain.from_iterable(item.split("-") for item in item_allergens)) + item_allergens = [x.strip().strip(".") for x in item_allergens] # Exclude last item, it is not an allergen but a diet name # eg. 'Vegetarian' or 'Vegan' - return {item_name.lower(): sorted({x.strip(".") for x in item_allergens if x.strip(".") not in SKIPPED_ELEMENTS})} + return {item_name.lower(): sorted({x for x in item_allergens if x not in SKIPPED_ELEMENTS})} def make_sections( From 4f373682e8d5e138262b4584533ad9e460a36459 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 24 Jan 2024 23:25:44 +0100 Subject: [PATCH 4/5] Implement allergens for vegetables --- server/scraper/resto/menu.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/server/scraper/resto/menu.py b/server/scraper/resto/menu.py index f3204c22..59c81893 100755 --- a/server/scraper/resto/menu.py +++ b/server/scraper/resto/menu.py @@ -166,7 +166,8 @@ "warme maaltijden: vlees", "groenten bij warme maaltijden", "zetmeel", - "soep" + "soep", + "groenten bij warme maaltijden" ] @@ -314,6 +315,7 @@ def get_day_menu(which, url, allergens: Dict[str, str]): # system) day_menu = pq(url=url) vegetables = [] + vegetables2 = [] meats = [] soups = [] @@ -399,6 +401,20 @@ def get_day_menu(which, url, allergens: Dict[str, str]): meats.append(dict(price=price, name=name, kind=kind, hot=hot_cold, allergens=food_allergens)) elif HEADING_TO_TYPE[last_heading] == 'vegetables': vegetables.append(meal) + if ":" in meal: + kind, name = meal.split(":") + if kind != 'vegan' and kind != 'vegetarian': + kind = 'meat' + else: + kind = 'meat' + name = meal + vegetable_allergens = find_allergens_for_food(allergens, name) + vegetable = { + 'name': meal, + 'kind': kind, + 'allergens': vegetable_allergens + } + vegetables2.append(vegetable) else: raise ValueError(f"Oops, HEADING_TO_TYPE contains unknown value for {last_heading}.") @@ -406,7 +422,7 @@ def get_day_menu(which, url, allergens: Dict[str, str]): if not vegetables and not soups and not meats: return dict(open=False) - r = dict(open=True, vegetables=vegetables, soup=soups, meat=meats) + r = dict(open=True, vegetables=vegetables, vegetables2=vegetables2, soup=soups, meat=meats) return r From 5ca9208c7a039bfd8fbae5360bef04dc528ee3e5 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Wed, 24 Jan 2024 23:29:21 +0100 Subject: [PATCH 5/5] Do an editorial pass over the docs --- api-resto-02.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/api-resto-02.md b/api-resto-02.md index 7e361614..f41bfb85 100644 --- a/api-resto-02.md +++ b/api-resto-02.md @@ -6,7 +6,7 @@ The resto API provides information about the student restaurants of Ghent Univer These data are scraped from https://www.ugent.be/student/nl/meer-dan-studeren/resto. -The menu data is property of Ghent University. We don't guarantee the correctness or completeness of the data. +The menu data is property of Ghent University. We do not guarantee the correctness or completeness of the data. ## Versioning and status @@ -291,21 +291,21 @@ A vegetable object consists of: |-------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| | `kind` | The kind of the vegetable. A subset of the meal kind, currently `meat`, `vegetarian`, or `vegan`. Applications must be able to handle changes to the possible values. | | `name` | The name of the vegetable. | -| `allergens` | List of allergens, matched on a best-efforts basis from the [allergen information](#allergen-information). | +| `allergens` | List of allergens, matched on a best-effort basis from the [allergen information](#allergen-information). | > **Warning** > The allergen information, like all other information in the API, is available on a best-effort basis. > Particularly, this information IS NOT FIT to replace the legally mandated information about allergens. > When showing these data to users, please inform them of this and link to the web page. -How an application handles changes to possible values (indicated above where this is applicable), is not specified. +How an application handles changes to possible values (indicated above where this is applicable) is not specified. The application might simply ignore new values. ### Regular sandwiches **Endpoint**: `GET /sandwiches/static.json` -Lists available regular sandwiches, their price and their ingredients. Sample output: +Lists available regular sandwiches, their price, and their ingredients. Sample output: ```json [ @@ -346,9 +346,9 @@ as [Weekly sandwiches yearly](#weekly-sandwiches-yearly). - _year_ -- Which year you want the sandwiches of. Values must be a positive integer. Currently, the earliest available year is 2019 (but this might change in the future). ISO format: `YYYY`. -Starting in academic year 2020-2021, this is listed as "groentespread". +Starting in academic year 2020–2021, this is listed as "groentespread". -Lists all sandwiches which were or are available in the specified year. Sample output: +List all sandwiches that were or are available in the specified year. Sample output: ```json [ @@ -407,7 +407,7 @@ Since that webpage is made manually, it is very possible that the names used her menu. > **Warning** -> This parser, as all other information in the API, is available on a best-efforts basis. +> This parser, as all other information in the API, is available on a best-effort basis. > Particularly, this information IS NOT FIT to replace the legally mandated information about allergens. > When showing these data to users, please inform them of this and link to the web page.