diff --git a/server/scraper/resto/allergens.py b/server/scraper/resto/allergens.py index f6eea428..7374f008 100755 --- a/server/scraper/resto/allergens.py +++ b/server/scraper/resto/allergens.py @@ -28,12 +28,12 @@ def parse_section_item(section_item: str) -> Union[dict[str, list[str]], None]: """ Parses strings of the form `food: allergen, allergen, allergen` """ - + if "soep van de dag" in section_item: item_name = "Soep van de dag" item_allergen_list = section_item else: - item_name, item_allergen_list = section_item.split(":") + item_name, item_allergen_list = section_item.split(":", maxsplit=1) # Sometimes a section will have extra info before the item list, # this should not be parsed @@ -73,7 +73,8 @@ def make_sections( sections[section_header] = dict() for raw_section_item in raw_section_items: section_item = raw_section_item.get_text(strip=True) - assert section_item is not None + if not section_item: + continue section_item_map = parse_section_item(section_item) if section_item_map is None: diff --git a/server/scraper/resto/menu.py b/server/scraper/resto/menu.py index 15ac8dd0..e5c73e56 100755 --- a/server/scraper/resto/menu.py +++ b/server/scraper/resto/menu.py @@ -5,8 +5,9 @@ import json import os import re +import warnings -from bs4 import BeautifulSoup +from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning import string import sys import traceback @@ -15,6 +16,8 @@ from pyquery import PyQuery as pq +warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning) + # Bad python module system sys.path.append('..') @@ -30,16 +33,11 @@ "en": "https://www.ugent.be/en/facilities/restaurants/weekly-menu", "nl": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu", "nl-debrug-avond": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenubrugavond", - "nl-coupure": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu", - "nl-dunant": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu", - "nl-merelbeke": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu", } NORMAL_WEEK = re.compile(r"week(\d+)$") INDIVIDUAL_DAY_URL_OVERRIDE = { "nl-coupure": r"week(\d+)coupure$", - "nl-dunant": r"week(\d+)(merelbekedunant|dunant)$", - "nl-merelbeke": r"week(\d+)(merelbekedunant|merelbeke)$", "nl-debrug": r"week(\d+)brugsterre|week(27)duurzaam|week(28)duurzaam", "nl-sterre": r"week(\d+)(brugsterre|sterre)|week(27)duurzaam", "nl-ardoyen": r"week(\d+)ardoyen" @@ -50,12 +48,9 @@ # which is very useful. COPIED_ENDPOINTS = { "nl-debrug": "nl", - "nl-heymans": "nl", - "nl-dunant": "nl", "nl-coupure": "nl", "nl-sterre": "nl", "nl-ardoyen": "nl", - "nl-merelbeke": "nl", } # Day names to day of the week. @@ -346,8 +341,20 @@ def get_day_menu(which, url, allergens: Dict[str, str]): if HEADING_TO_TYPE[last_heading] == 'soup': name, price = split_price(meal) + if "€" in name: + name, price_large = split_price(name) + else: + price_large = None food_allergens = find_allergens_for_food(allergens, name) - soups.append(dict(price=price, name=name, type='side', allergens=food_allergens)) + if price_large: + small = "klein" if "nl" in which else "small" + big = "groot" if "nl" in which else "big" + name_small = f"{name} {small}" + name_big = f"{name} {big}" + soups.append(dict(price=price, name=name_small, type='side', allergens=food_allergens)) + soups.append(dict(price=price_large, name=name_big, type='side', allergens=food_allergens)) + else: + soups.append(dict(price=price, name=name, type='side', allergens=food_allergens)) elif HEADING_TO_TYPE[last_heading] == 'meal soup': name, price = split_price(meal) food_allergens = find_allergens_for_food(allergens, name) diff --git a/server/scraper/resto/sandwiches.py b/server/scraper/resto/sandwiches.py index 6fcdc22d..fb704408 100755 --- a/server/scraper/resto/sandwiches.py +++ b/server/scraper/resto/sandwiches.py @@ -12,7 +12,7 @@ import sys sys.path.append('..') -from util import parse_money, write_json_to_file +from util import parse_money, write_json_to_file, split_price SANDWICHES_URL = "https://www.ugent.be/student/nl/meer-dan-studeren/resto/broodjes/overzicht.htm" HTML_PARSER = 'lxml' @@ -116,8 +116,7 @@ def weekly_sandwiches(output, soup): 'start': start, 'end': end, 'name': columns[1].text.strip(), - 'ingredients': parse_ingredients(columns[2].text), - 'vegan': 'x' in columns[3].text + 'ingredients': parse_ingredients(columns[2].text) }) today = datetime.date.today() @@ -166,15 +165,17 @@ def salad_bowls(output, soup): """ bowls = [] - tables = soup.find_all('table', limit=3) + tables = soup.find_all('table', limit=4) - if len(tables) >= 3: - for row in soup.find_all('table', limit=3)[2].find_all("tr", class_=lambda x: x != 'tabelheader'): + if len(tables) >= 4: + header = soup.find('a', id="salad-bowls").parent + _, price = split_price(header.text) if header else (None, None) + for row in tables[3].find_all("tr", class_=lambda x: x != 'tabelheader'): columns = row.find_all("td") bowls.append({ 'name': columns[0].text.strip(), 'description': columns[1].text.strip(), - 'price': parse_money(columns[2].string) if columns[2].string else "" + 'price': parse_money(price) if price else "" }) output_file = os.path.join(output, SALADS) diff --git a/server/scraper/util.py b/server/scraper/util.py index fecb7f43..34899f5c 100644 --- a/server/scraper/util.py +++ b/server/scraper/util.py @@ -49,9 +49,14 @@ def split_price(meal): name = '-'.join(meal.split('-')[:-1]).strip() name, price = move_junk_from_price_to_name(name, price) return name, price + elif "/" in meal and "€" in meal: + price = meal.split('/')[-1].strip() + name = '/'.join(meal.split('/')[:-1]).strip() + name, price = move_junk_from_price_to_name(name, price) + return name, price elif "€" in meal: meal, price = meal.split("€") - return meal.strip(), price + return meal.strip(), price.strip() else: return meal.strip(), "" diff --git a/server/static/resto/meta_2.0.json b/server/static/resto/meta_2.0.json index 2fffdf42..1c028df4 100644 --- a/server/static/resto/meta_2.0.json +++ b/server/static/resto/meta_2.0.json @@ -1,106 +1,169 @@ { - "locations": [ - { - "name": "English menu", - "address": "Sint-Pietersnieuwstraat", - "latitude": 51.045113, - "longitude": 3.727347, - "type": "resto", - "endpoint": "en", - "open": {"resto":[["08:30", "15:00"]]} - },{ - "name": "Resto Campus Sterre", - "address": "Krijgslaan 281", - "latitude": 51.026024, - "longitude": 3.712939, - "type": "resto", - "endpoint": "nl-sterre", - "open": {"resto":[["08:30", "14:00"]]} - }, { - "name": "Resto Campus Heymans", - "address": "Harelbekestraat 70", - "latitude": 51.026508, - "longitude": 3.730189, - "type": "resto", - "endpoint": "nl-heymans", - "open": {"resto":[["08:30", "15:00"]]} - }, { - "name": "Resto De Brug", - "address": "Sint-Pietersnieuwstraat 45", - "latitude": 51.045613, - "longitude": 3.727147, - "type": "resto", - "endpoint": "nl-debrug", - "open": {"resto":[["08:30", "15:00"]], - "cafetaria":[["08:00", "16:00"]]} - }, { - "name": "Resto De Brug (avond)", - "address": "Sint-Pietersnieuwstraat 45", - "latitude": 51.045613, - "longitude": 3.727147, - "type": "resto", - "endpoint": "nl-debrug-avond", - "open": {"resto":[["17:30", "21:00"]]} - }, { - "name": "Resto Campus Merelbeke", - "address": "Salisburylaan 133, Merelbeke", - "latitude": 50.998369, - "longitude": 3.766454, - "type": "resto", - "endpoint": "nl-merelbeke", - "open": {"resto":[["08:30", "15:00"]]} - }, { - "name": "Resto Campus Dunant", - "address": "Henri Dunantlaan 2", - "latitude": 51.049023, - "longitude": 3.704017, - "type": "resto", - "endpoint": "nl-dunant", - "open": {"resto":[["08:30", "15:00"]]} - }, { - "name": "Resto Campus Coupure", - "address": "Coupure Links 653", - "latitude": 51.053252, - "longitude": 3.707671, - "type": "resto", - "endpoint": "nl-coupure", - "open": {"resto":[["08:30", "15:00"]]} - }, { - "name": "Resto Ardoyen", - "address": "Technologiepark-Zwijnaarde 75, 9052 Zwijnaarde", - "latitude": 51.010207, - "longitude": 3.707660, - "type": "resto", - "endpoint": "nl-ardoyen", - "open": {"resto":[["08:30", "15:00"]]} - }, { - "name": "Cafetaria Campus Boekentoren", - "address": "Blandijnberg 2", - "latitude": 51.043960, - "longitude": 3.725230, - "type": "cafetaria", - "open": {} - }, { - "name": "Cafetaria Campus UZ Gent", - "address": "Corneel Heymanslaan", - "latitude": 51.024360, - "longitude": 3.723540, - "type": "cafetaria", - "open": {"cafetaria":[["09:00", "14:30"]]} - }, { - "name": "Cafetaria Ledeganck", - "address": "Ledeganckstraat 35", - "latitude": 51.036520, - "longitude": 3.723990, - "type": "cafetaria", - "open": {} - }, { - "name": "Cafetaria Campus Aula", - "address": "Universiteitsstraat 4", - "latitude": 51.051029, - "longitude": 3.723928, - "type": "cafetaria", - "open": {} - } - ] + "locations": [ + { + "name": "English menu", + "address": "Sint-Pietersnieuwstraat", + "latitude": 51.045113, + "longitude": 3.727347, + "type": "resto", + "endpoint": "en", + "open": { + "resto": [ + [ + "08:30", + "15:00" + ] + ] + } + }, + { + "name": "Resto Campus Sterre", + "address": "Krijgslaan 281", + "latitude": 51.026024, + "longitude": 3.712939, + "type": "resto", + "endpoint": "nl-sterre", + "open": { + "resto": [ + [ + "08:30", + "14:00" + ] + ] + } + }, + { + "name": "Resto De Brug", + "address": "Sint-Pietersnieuwstraat 45", + "latitude": 51.045613, + "longitude": 3.727147, + "type": "resto", + "endpoint": "nl-debrug", + "open": { + "resto": [ + [ + "08:30", + "15:00" + ] + ], + "cafetaria": [ + [ + "08:00", + "16:00" + ] + ] + } + }, + { + "name": "Resto De Brug (avond)", + "address": "Sint-Pietersnieuwstraat 45", + "latitude": 51.045613, + "longitude": 3.727147, + "type": "resto", + "endpoint": "nl-debrug-avond", + "open": { + "resto": [ + [ + "17:30", + "21:00" + ] + ] + } + }, + { + "name": "Resto Campus Coupure", + "address": "Coupure Links 653", + "latitude": 51.053252, + "longitude": 3.707671, + "type": "resto", + "endpoint": "nl-coupure", + "open": { + "resto": [ + [ + "08:30", + "15:00" + ] + ] + } + }, + { + "name": "Resto Ardoyen", + "address": "Technologiepark-Zwijnaarde 75, 9052 Zwijnaarde", + "latitude": 51.010207, + "longitude": 3.707660, + "type": "resto", + "endpoint": "nl-ardoyen", + "open": { + "resto": [ + [ + "08:30", + "15:00" + ] + ] + } + }, + { + "name": "Cafetaria Campus Boekentoren", + "address": "Blandijnberg 2", + "latitude": 51.043960, + "longitude": 3.725230, + "type": "cafetaria", + "open": {} + }, + { + "name": "Cafetaria Campus UZ Gent", + "address": "Corneel Heymanslaan", + "latitude": 51.024360, + "longitude": 3.723540, + "type": "cafetaria", + "open": { + "cafetaria": [ + [ + "09:00", + "14:30" + ] + ] + } + }, + { + "name": "Cafetaria Ledeganck", + "address": "Ledeganckstraat 35", + "latitude": 51.036520, + "longitude": 3.723990, + "type": "cafetaria", + "open": {} + }, + { + "name": "Cafetaria Campus Aula", + "address": "Universiteitsstraat 4", + "latitude": 51.051029, + "longitude": 3.723928, + "type": "cafetaria", + "open": {} + }, + { + "name": "Cafetaria Heymans", + "address": "Harelbekestraat 70", + "latitude": 51.026508, + "longitude": 3.730189, + "type": "cafetaria", + "open": {} + }, + { + "name": "Cafetaria Merelbeke", + "address": "Salisburylaan 133, Merelbeke", + "latitude": 50.998369, + "longitude": 3.766454, + "type": "cafetaria", + "open": {} + }, + { + "name": "Cafetaria Dunant", + "address": "Henri Dunantlaan 2", + "latitude": 51.049023, + "longitude": 3.704017, + "type": "cafetaria", + "open": {} + } + ] }