Skip to content

Commit

Permalink
Merge pull request #473 from ZeusWPI/update3
Browse files Browse the repository at this point in the history
Start of year update
  • Loading branch information
niknetniko authored Oct 3, 2023
2 parents ea53d33 + d3839ca commit 2f3bbcd
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 125 deletions.
7 changes: 4 additions & 3 deletions server/scraper/resto/allergens.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@ def parse_section_item(section_item: str) -> Union[dict[str, list[str]], None]:
"""
Parses strings of the form `food: allergen, allergen, allergen`
"""

if "soep van de dag" in section_item:
item_name = "Soep van de dag"
item_allergen_list = section_item
else:
item_name, item_allergen_list = section_item.split(":")
item_name, item_allergen_list = section_item.split(":", maxsplit=1)

# Sometimes a section will have extra info before the item list,
# this should not be parsed
Expand Down Expand Up @@ -73,7 +73,8 @@ def make_sections(
sections[section_header] = dict()
for raw_section_item in raw_section_items:
section_item = raw_section_item.get_text(strip=True)
assert section_item is not None
if not section_item:
continue

section_item_map = parse_section_item(section_item)
if section_item_map is None:
Expand Down
27 changes: 17 additions & 10 deletions server/scraper/resto/menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
import json
import os
import re
import warnings

from bs4 import BeautifulSoup
from bs4 import BeautifulSoup, MarkupResemblesLocatorWarning
import string
import sys
import traceback
Expand All @@ -15,6 +16,8 @@

from pyquery import PyQuery as pq

warnings.filterwarnings("ignore", category=MarkupResemblesLocatorWarning)

# Bad python module system
sys.path.append('..')

Expand All @@ -30,16 +33,11 @@
"en": "https://www.ugent.be/en/facilities/restaurants/weekly-menu",
"nl": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu",
"nl-debrug-avond": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenubrugavond",
"nl-coupure": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu",
"nl-dunant": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu",
"nl-merelbeke": "https://www.ugent.be/student/nl/meer-dan-studeren/resto/weekmenu",
}

NORMAL_WEEK = re.compile(r"week(\d+)$")
INDIVIDUAL_DAY_URL_OVERRIDE = {
"nl-coupure": r"week(\d+)coupure$",
"nl-dunant": r"week(\d+)(merelbekedunant|dunant)$",
"nl-merelbeke": r"week(\d+)(merelbekedunant|merelbeke)$",
"nl-debrug": r"week(\d+)brugsterre|week(27)duurzaam|week(28)duurzaam",
"nl-sterre": r"week(\d+)(brugsterre|sterre)|week(27)duurzaam",
"nl-ardoyen": r"week(\d+)ardoyen"
Expand All @@ -50,12 +48,9 @@
# which is very useful.
COPIED_ENDPOINTS = {
"nl-debrug": "nl",
"nl-heymans": "nl",
"nl-dunant": "nl",
"nl-coupure": "nl",
"nl-sterre": "nl",
"nl-ardoyen": "nl",
"nl-merelbeke": "nl",
}

# Day names to day of the week.
Expand Down Expand Up @@ -346,8 +341,20 @@ def get_day_menu(which, url, allergens: Dict[str, str]):

if HEADING_TO_TYPE[last_heading] == 'soup':
name, price = split_price(meal)
if "€" in name:
name, price_large = split_price(name)
else:
price_large = None
food_allergens = find_allergens_for_food(allergens, name)
soups.append(dict(price=price, name=name, type='side', allergens=food_allergens))
if price_large:
small = "klein" if "nl" in which else "small"
big = "groot" if "nl" in which else "big"
name_small = f"{name} {small}"
name_big = f"{name} {big}"
soups.append(dict(price=price, name=name_small, type='side', allergens=food_allergens))
soups.append(dict(price=price_large, name=name_big, type='side', allergens=food_allergens))
else:
soups.append(dict(price=price, name=name, type='side', allergens=food_allergens))
elif HEADING_TO_TYPE[last_heading] == 'meal soup':
name, price = split_price(meal)
food_allergens = find_allergens_for_food(allergens, name)
Expand Down
15 changes: 8 additions & 7 deletions server/scraper/resto/sandwiches.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import sys
sys.path.append('..')

from util import parse_money, write_json_to_file
from util import parse_money, write_json_to_file, split_price

SANDWICHES_URL = "https://www.ugent.be/student/nl/meer-dan-studeren/resto/broodjes/overzicht.htm"
HTML_PARSER = 'lxml'
Expand Down Expand Up @@ -116,8 +116,7 @@ def weekly_sandwiches(output, soup):
'start': start,
'end': end,
'name': columns[1].text.strip(),
'ingredients': parse_ingredients(columns[2].text),
'vegan': 'x' in columns[3].text
'ingredients': parse_ingredients(columns[2].text)
})

today = datetime.date.today()
Expand Down Expand Up @@ -166,15 +165,17 @@ def salad_bowls(output, soup):
"""
bowls = []

tables = soup.find_all('table', limit=3)
tables = soup.find_all('table', limit=4)

if len(tables) >= 3:
for row in soup.find_all('table', limit=3)[2].find_all("tr", class_=lambda x: x != 'tabelheader'):
if len(tables) >= 4:
header = soup.find('a', id="salad-bowls").parent
_, price = split_price(header.text) if header else (None, None)
for row in tables[3].find_all("tr", class_=lambda x: x != 'tabelheader'):
columns = row.find_all("td")
bowls.append({
'name': columns[0].text.strip(),
'description': columns[1].text.strip(),
'price': parse_money(columns[2].string) if columns[2].string else ""
'price': parse_money(price) if price else ""
})

output_file = os.path.join(output, SALADS)
Expand Down
7 changes: 6 additions & 1 deletion server/scraper/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,14 @@ def split_price(meal):
name = '-'.join(meal.split('-')[:-1]).strip()
name, price = move_junk_from_price_to_name(name, price)
return name, price
elif "/" in meal and "€" in meal:
price = meal.split('/')[-1].strip()
name = '/'.join(meal.split('/')[:-1]).strip()
name, price = move_junk_from_price_to_name(name, price)
return name, price
elif "€" in meal:
meal, price = meal.split("€")
return meal.strip(), price
return meal.strip(), price.strip()
else:
return meal.strip(), ""

Expand Down
Loading

0 comments on commit 2f3bbcd

Please sign in to comment.