Skip to content

Commit

Permalink
Merge branch 'master' into feat/docker
Browse files Browse the repository at this point in the history
  • Loading branch information
tyboro2002 authored Dec 20, 2024
2 parents 24fcbbd + 903fdba commit 5004849
Show file tree
Hide file tree
Showing 9 changed files with 341 additions and 14 deletions.
40 changes: 40 additions & 0 deletions db.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
{
"files": {
"hlds_files\\bicyclette.hlds": {
"local_file_path": "hlds_files\\bicyclette.hlds",
"metadata": {
"sync-to": "menus\\la_bicylette.hlds"
}
},
"hlds_files\\bocca_ovp.hlds": {
"local_file_path": "hlds_files\\bocca_ovp.hlds",
"metadata": {
"sync-to": "menus\\bocca_ovp.hlds"
}
},
"hlds_files\\metropol.hlds": {
"local_file_path": "hlds_files\\metropol.hlds",
"metadata": {
"sync-to": "menus\\pitta_metropol.hlds"
}
},
"hlds_files\\pizza_donna.hlds": {
"local_file_path": "hlds_files\\pizza_donna.hlds",
"metadata": {
"sync-to": "menus\\prima_donna.hlds"
}
},
"hlds_files\\s5.hlds": {
"local_file_path": "hlds_files\\s5.hlds",
"metadata": {
"sync-to": "menus\\s5.hlds"
}
},
"hlds_files\\simpizza.hlds": {
"local_file_path": "hlds_files\\simpizza.hlds",
"metadata": {
"sync-to": "menus\\simpizza.hlds"
}
}
}
}
147 changes: 147 additions & 0 deletions db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import hashlib
import json
import os
from os.path import exists
from typing import Dict, List

db_filename = "db.json"


def init_db():
file_exists = exists(db_filename)
if not file_exists:
print("Initializing json file database")
with open(db_filename, "w") as db_file:
db_file.write("{}")


init_db()


def _load_db():
with open(db_filename, "r") as db_file:
db = json.loads(db_file.read())
return db


def _save_db(db):
with open(db_filename, "w") as db_file:
db_file.write(json.dumps(db, indent=4))


def calculate_file_hash(file_path: str) -> str:
"""
Calculate the SHA256 hash of a file's contents.
:param file_path: Path to the file
:return: Hexadecimal hash as a string
"""
hasher = hashlib.sha256()
with open(file_path, "rb") as file:
while chunk := file.read(8192): # Read file in chunks
hasher.update(chunk)
return hasher.hexdigest()


def is_file_different(hlds_file: str, menu_file: str) -> bool:
"""
Compare two files based on their SHA256 hashes to determine if they are different.
:param hlds_file: Path to the file in the hlds_menus directory
:param menu_file: Path to the file in the menus directory
:return: True if the files are different, False otherwise
"""
hlds_hash = calculate_file_hash(hlds_file)
menu_hash = calculate_file_hash(menu_file)
return hlds_hash != menu_hash


def get_manual_file_mapping() -> Dict[str, str]:
"""
Creates a manual mapping of file names between the hlds_menus directory and the menus directory.
:return: A dictionary mapping file paths from hlds_menus to menus
"""
hlds_dir = "hlds_files"
menu_dir = "menus"

# Manual mapping of file names
file_mapping = {
os.path.join(hlds_dir, "bicyclette.hlds"): os.path.join(menu_dir, "la_bicylette.hlds"),
os.path.join(hlds_dir, "bocca_ovp.hlds"): os.path.join(menu_dir, "bocca_ovp.hlds"),
os.path.join(hlds_dir, "metropol.hlds"): os.path.join(menu_dir, "pitta_metropol.hlds"),
os.path.join(hlds_dir, "pizza_donna.hlds"): os.path.join(menu_dir, "prima_donna.hlds"),
os.path.join(hlds_dir, "s5.hlds"): os.path.join(menu_dir, "s5.hlds"),
os.path.join(hlds_dir, "simpizza.hlds"): os.path.join(menu_dir, "simpizza.hlds"),
# Add more mappings here as needed
}

return file_mapping


def test_file_comparison():
"""
Compares all files based on the manual mapping and prints whether they are different or identical.
"""
file_mapping = get_manual_file_mapping()

for hlds_file, menu_file in file_mapping.items():
if not os.path.exists(hlds_file):
print(f"{hlds_file} does not exist. Skipping...")
continue

if not os.path.exists(menu_file):
print(f"{menu_file} does not exist. File {hlds_file} is new.")
add_file_to_db(hlds_file, menu_file) # Add the new file to the database
continue

if is_file_different(hlds_file, menu_file):
print(f"Files {hlds_file} and {menu_file} are different.")
add_file_to_db(hlds_file, menu_file) # Add the different file to the database
else:
print(f"Files {hlds_file} and {menu_file} are identical.")


def add_file_to_db(hlds_file, menu_file):
"""
Adds a new file to the database if it is different.
:param hlds_file: The file from the hlds_menus directory.
:param menu_file: The file from the menus directory.
"""
db = _load_db()
files = db.get("files", {})

# Add or update the file entry in the database
files[hlds_file] = {
"local_file_path": hlds_file,
"metadata": {"sync-to": menu_file},
}

db["files"] = files
_save_db(db)
print(f"Added {hlds_file} to the database as different.")


def get_latest_sync_time() -> int:
db = _load_db()
return db.get("latest_sync_time", 0)


def set_latest_sync_time(le_date) -> None:
db = _load_db()
db["latest_sync_time"] = le_date
_save_db(db)


def get_files() -> List[str]:
db = _load_db()
files = db.get("files", {})
return files


def set_local_file_path(file_id, local_file_path):
db = _load_db()
file = db["files"][file_id]
file["local_file_path"] = local_file_path
_save_db(db)
return file


test_file_comparison()
20 changes: 20 additions & 0 deletions hlds_files/s5.hlds
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
==========================
s5: S5
osm https://www.openstreetmap.org/node/2659815473
phone +32 000 00 00 00
address Krijgslaan 281, 9000 Gent
website https://www.ugent.be/student/nl/meer-dan-studeren/resto/restos/restocampussterre.htm
==========================

dish bloemkool_kaassaus: Bloemkool kaassaus € 0.0
dish bloemkool-kaasburger: Bloemkool-kaasburger € 5.05
dish braadworst: Braadworst € 5.55
dish julienne_groenten: Julienne groenten € 0.0
dish kaaskroketten: Kaaskroketten € 4.55
dish rauwkostslaatje: Rauwkostslaatje € 0.0
dish spaghetti_bolognaise: Spaghetti bolognaise € 5.2
dish spaghetti_met_groentesaus: Spaghetti met groentesaus € 4.5
dish tomatensoep_groot: Tomatensoep groot € 1.9
dish tomatensoep_klein: Tomatensoep klein € 1.25
dish witloofroomsoep_groot: Witloofroomsoep groot € 1.9
dish witloofroomsoep_klein: Witloofroomsoep klein € 1.25
10 changes: 10 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ def run_bocca_ovp():
file.write(translate_products_to_text(bocca_ovp_products))
print("bocca_ovp done", flush=True)
return len(bocca_ovp_products)

def run_s5():
s5_products, s5_location = S5Scraper.get_prices()
with open("hlds_files/s5.hlds", "w", encoding="utf-8") as file:
file.write(str(s5_location) + "\n")
file.write(translate_products_to_text(s5_products))
print("s5 done")
return len(s5_products)


def parse_arguments():
Expand Down Expand Up @@ -106,6 +114,8 @@ def run_scrapers(
tasks.append(run_bocca_ovp)
if run_everything or "pizza_donna" in [name.lower() for name in restaurant_names]:
tasks.append(run_pizza_donna)
if run_everything or "s5" in [name.lower() for name in restaurant_names]:
tasks.append(run_s5)

# print(f"Restaurants: {args.restaurant_name},evaluates to {"everything because run_everything is selected" if run_everything else restaurant_names}")
# print(f"Parallel: {args.use_parallelism},evaluates to {use_parallelism}")
Expand Down
1 change: 1 addition & 0 deletions menus
Submodule menus added at f76203
27 changes: 27 additions & 0 deletions run_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import traceback

import db
# import dir_utils
# import mattermost_client
# import mattermost_communication
import sync_gitmate as sync


def sync_files_to_gitea():
repo, api_handler = sync.init_sync()
print(db.get_files().items())
for file_id, file_info in db.get_files().items():
# print(file_id, file_info)
try:
sync.sync_file(repo, api_handler, file_info)
except Exception as e:
print("Critical error: Failed to sync file to Gitea")
traceback.print_exc()


if __name__ == "__main__":
print()
print("================================================")
print("== Syncing files to git ==")
sync_files_to_gitea()
print()
73 changes: 73 additions & 0 deletions scrapers/s5_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import json
import time
from datetime import datetime

import requests

from data_types.choice import ChoiceList, Choice
from data_types.location import Location
from data_types.product import Product
from scrapers.scraper import Scraper
from utils import extract_spans, filter_divs, create_heading_contains_h2_with, fetch_and_parse_html, comma_float


class S5Scraper(Scraper):
@staticmethod
def get_prices() -> (set[Product], Location):
start_time = time.time()

products = set()
locatie = Location(
"s5: S5",
"https://www.openstreetmap.org/node/2659815473",
"Krijgslaan 281, 9000 Gent",
"+32 000 00 00 00", # TODO not found
"https://www.ugent.be/student/nl/meer-dan-studeren/resto/restos/restocampussterre.htm"
)
# TODO check if we need to parse "GET /extrafood.json"
# Construct today's date dynamically for the endpoint
today = datetime.now()
api_url = f"https://hydra.ugent.be/api/2.0/resto/menu/nl/{today.year}/{today.month}/{today.day}.json"
print(f"today is: {today.year}-{today.month}-{today.day} Fetching data from: {api_url}")

try:
# Send GET request to the API
response = requests.get(api_url, timeout=10)
response.raise_for_status() # Raise exception for HTTP errors

# Parse JSON response
data = response.json()

# Extract and print all items under the "meals" section
if "meals" in data:
for meal in data["meals"]:
# Safely access the 'name' and 'price' keys
name = meal.get("name", "Unnamed meal")
price = comma_float(meal.get("price", "€ 0.0").split(" ")[1])
products.add(Product(name=name, price=price))
else:
print("No 'meals' section found in the JSON response.")

if "vegetables" in data:
for vegetable in data["vegetables"]:
# Safely access the 'name' and 'price' keys
name = vegetable.split(":")[1][1:]
price = 0.0
products.add(Product(name=name, price=price))
else:
print("No 'vegetables' section found in the JSON response.")
# print("\nFull JSON response:\n")
# print(json.dumps(data, indent=4, sort_keys=True, ensure_ascii=False))

except requests.exceptions.HTTPError as http_err:
print(f"HTTP error occurred: {http_err}")
except Exception as e:
print(f"An error occurred: {e}")

end_time = time.time()
elapsed_time = end_time - start_time
# Convert seconds to minutes and seconds
minutes = int(elapsed_time // 60)
seconds = elapsed_time % 60
print(f"get_prices executed in {minutes} minute(s) and {seconds:.2f} second(s).")
return products, locatie
Loading

0 comments on commit 5004849

Please sign in to comment.