diff --git a/db.json b/db.json new file mode 100644 index 0000000..b54b0fe --- /dev/null +++ b/db.json @@ -0,0 +1,40 @@ +{ + "files": { + "hlds_files\\bicyclette.hlds": { + "local_file_path": "hlds_files\\bicyclette.hlds", + "metadata": { + "sync-to": "menus\\la_bicylette.hlds" + } + }, + "hlds_files\\bocca_ovp.hlds": { + "local_file_path": "hlds_files\\bocca_ovp.hlds", + "metadata": { + "sync-to": "menus\\bocca_ovp.hlds" + } + }, + "hlds_files\\metropol.hlds": { + "local_file_path": "hlds_files\\metropol.hlds", + "metadata": { + "sync-to": "menus\\pitta_metropol.hlds" + } + }, + "hlds_files\\pizza_donna.hlds": { + "local_file_path": "hlds_files\\pizza_donna.hlds", + "metadata": { + "sync-to": "menus\\prima_donna.hlds" + } + }, + "hlds_files\\s5.hlds": { + "local_file_path": "hlds_files\\s5.hlds", + "metadata": { + "sync-to": "menus\\s5.hlds" + } + }, + "hlds_files\\simpizza.hlds": { + "local_file_path": "hlds_files\\simpizza.hlds", + "metadata": { + "sync-to": "menus\\simpizza.hlds" + } + } + } +} \ No newline at end of file diff --git a/db.py b/db.py new file mode 100644 index 0000000..66fb38f --- /dev/null +++ b/db.py @@ -0,0 +1,147 @@ +import hashlib +import json +import os +from os.path import exists +from typing import Dict, List + +db_filename = "db.json" + + +def init_db(): + file_exists = exists(db_filename) + if not file_exists: + print("Initializing json file database") + with open(db_filename, "w") as db_file: + db_file.write("{}") + + +init_db() + + +def _load_db(): + with open(db_filename, "r") as db_file: + db = json.loads(db_file.read()) + return db + + +def _save_db(db): + with open(db_filename, "w") as db_file: + db_file.write(json.dumps(db, indent=4)) + + +def calculate_file_hash(file_path: str) -> str: + """ + Calculate the SHA256 hash of a file's contents. + :param file_path: Path to the file + :return: Hexadecimal hash as a string + """ + hasher = hashlib.sha256() + with open(file_path, "rb") as file: + while chunk := file.read(8192): # Read file in chunks + hasher.update(chunk) + return hasher.hexdigest() + + +def is_file_different(hlds_file: str, menu_file: str) -> bool: + """ + Compare two files based on their SHA256 hashes to determine if they are different. + :param hlds_file: Path to the file in the hlds_menus directory + :param menu_file: Path to the file in the menus directory + :return: True if the files are different, False otherwise + """ + hlds_hash = calculate_file_hash(hlds_file) + menu_hash = calculate_file_hash(menu_file) + return hlds_hash != menu_hash + + +def get_manual_file_mapping() -> Dict[str, str]: + """ + Creates a manual mapping of file names between the hlds_menus directory and the menus directory. + :return: A dictionary mapping file paths from hlds_menus to menus + """ + hlds_dir = "hlds_files" + menu_dir = "menus" + + # Manual mapping of file names + file_mapping = { + os.path.join(hlds_dir, "bicyclette.hlds"): os.path.join(menu_dir, "la_bicylette.hlds"), + os.path.join(hlds_dir, "bocca_ovp.hlds"): os.path.join(menu_dir, "bocca_ovp.hlds"), + os.path.join(hlds_dir, "metropol.hlds"): os.path.join(menu_dir, "pitta_metropol.hlds"), + os.path.join(hlds_dir, "pizza_donna.hlds"): os.path.join(menu_dir, "prima_donna.hlds"), + os.path.join(hlds_dir, "s5.hlds"): os.path.join(menu_dir, "s5.hlds"), + os.path.join(hlds_dir, "simpizza.hlds"): os.path.join(menu_dir, "simpizza.hlds"), + # Add more mappings here as needed + } + + return file_mapping + + +def test_file_comparison(): + """ + Compares all files based on the manual mapping and prints whether they are different or identical. + """ + file_mapping = get_manual_file_mapping() + + for hlds_file, menu_file in file_mapping.items(): + if not os.path.exists(hlds_file): + print(f"{hlds_file} does not exist. Skipping...") + continue + + if not os.path.exists(menu_file): + print(f"{menu_file} does not exist. File {hlds_file} is new.") + add_file_to_db(hlds_file, menu_file) # Add the new file to the database + continue + + if is_file_different(hlds_file, menu_file): + print(f"Files {hlds_file} and {menu_file} are different.") + add_file_to_db(hlds_file, menu_file) # Add the different file to the database + else: + print(f"Files {hlds_file} and {menu_file} are identical.") + + +def add_file_to_db(hlds_file, menu_file): + """ + Adds a new file to the database if it is different. + :param hlds_file: The file from the hlds_menus directory. + :param menu_file: The file from the menus directory. + """ + db = _load_db() + files = db.get("files", {}) + + # Add or update the file entry in the database + files[hlds_file] = { + "local_file_path": hlds_file, + "metadata": {"sync-to": menu_file}, + } + + db["files"] = files + _save_db(db) + print(f"Added {hlds_file} to the database as different.") + + +def get_latest_sync_time() -> int: + db = _load_db() + return db.get("latest_sync_time", 0) + + +def set_latest_sync_time(le_date) -> None: + db = _load_db() + db["latest_sync_time"] = le_date + _save_db(db) + + +def get_files() -> List[str]: + db = _load_db() + files = db.get("files", {}) + return files + + +def set_local_file_path(file_id, local_file_path): + db = _load_db() + file = db["files"][file_id] + file["local_file_path"] = local_file_path + _save_db(db) + return file + + +test_file_comparison() diff --git a/hlds_files/s5.hlds b/hlds_files/s5.hlds new file mode 100644 index 0000000..688e342 --- /dev/null +++ b/hlds_files/s5.hlds @@ -0,0 +1,20 @@ +========================== +s5: S5 + osm https://www.openstreetmap.org/node/2659815473 + phone +32 000 00 00 00 + address Krijgslaan 281, 9000 Gent + website https://www.ugent.be/student/nl/meer-dan-studeren/resto/restos/restocampussterre.htm +========================== + +dish bloemkool_kaassaus: Bloemkool kaassaus € 0.0 +dish bloemkool-kaasburger: Bloemkool-kaasburger € 5.05 +dish braadworst: Braadworst € 5.55 +dish julienne_groenten: Julienne groenten € 0.0 +dish kaaskroketten: Kaaskroketten € 4.55 +dish rauwkostslaatje: Rauwkostslaatje € 0.0 +dish spaghetti_bolognaise: Spaghetti bolognaise € 5.2 +dish spaghetti_met_groentesaus: Spaghetti met groentesaus € 4.5 +dish tomatensoep_groot: Tomatensoep groot € 1.9 +dish tomatensoep_klein: Tomatensoep klein € 1.25 +dish witloofroomsoep_groot: Witloofroomsoep groot € 1.9 +dish witloofroomsoep_klein: Witloofroomsoep klein € 1.25 diff --git a/main.py b/main.py index 30f2188..e7c9f2a 100644 --- a/main.py +++ b/main.py @@ -52,6 +52,14 @@ def run_bocca_ovp(): file.write(translate_products_to_text(bocca_ovp_products)) print("bocca_ovp done", flush=True) return len(bocca_ovp_products) + + def run_s5(): + s5_products, s5_location = S5Scraper.get_prices() + with open("hlds_files/s5.hlds", "w", encoding="utf-8") as file: + file.write(str(s5_location) + "\n") + file.write(translate_products_to_text(s5_products)) + print("s5 done") + return len(s5_products) def parse_arguments(): @@ -106,6 +114,8 @@ def run_scrapers( tasks.append(run_bocca_ovp) if run_everything or "pizza_donna" in [name.lower() for name in restaurant_names]: tasks.append(run_pizza_donna) + if run_everything or "s5" in [name.lower() for name in restaurant_names]: + tasks.append(run_s5) # print(f"Restaurants: {args.restaurant_name},evaluates to {"everything because run_everything is selected" if run_everything else restaurant_names}") # print(f"Parallel: {args.use_parallelism},evaluates to {use_parallelism}") diff --git a/menus b/menus new file mode 160000 index 0000000..f76203a --- /dev/null +++ b/menus @@ -0,0 +1 @@ +Subproject commit f76203ad32d8258762155975a3ddda140e9c63bd diff --git a/run_sync.py b/run_sync.py new file mode 100644 index 0000000..2efc6fd --- /dev/null +++ b/run_sync.py @@ -0,0 +1,27 @@ +import traceback + +import db +# import dir_utils +# import mattermost_client +# import mattermost_communication +import sync_gitmate as sync + + +def sync_files_to_gitea(): + repo, api_handler = sync.init_sync() + print(db.get_files().items()) + for file_id, file_info in db.get_files().items(): + # print(file_id, file_info) + try: + sync.sync_file(repo, api_handler, file_info) + except Exception as e: + print("Critical error: Failed to sync file to Gitea") + traceback.print_exc() + + +if __name__ == "__main__": + print() + print("================================================") + print("== Syncing files to git ==") + sync_files_to_gitea() + print() diff --git a/scrapers/s5_scraper.py b/scrapers/s5_scraper.py new file mode 100644 index 0000000..6130772 --- /dev/null +++ b/scrapers/s5_scraper.py @@ -0,0 +1,73 @@ +import json +import time +from datetime import datetime + +import requests + +from data_types.choice import ChoiceList, Choice +from data_types.location import Location +from data_types.product import Product +from scrapers.scraper import Scraper +from utils import extract_spans, filter_divs, create_heading_contains_h2_with, fetch_and_parse_html, comma_float + + +class S5Scraper(Scraper): + @staticmethod + def get_prices() -> (set[Product], Location): + start_time = time.time() + + products = set() + locatie = Location( + "s5: S5", + "https://www.openstreetmap.org/node/2659815473", + "Krijgslaan 281, 9000 Gent", + "+32 000 00 00 00", # TODO not found + "https://www.ugent.be/student/nl/meer-dan-studeren/resto/restos/restocampussterre.htm" + ) + # TODO check if we need to parse "GET /extrafood.json" + # Construct today's date dynamically for the endpoint + today = datetime.now() + api_url = f"https://hydra.ugent.be/api/2.0/resto/menu/nl/{today.year}/{today.month}/{today.day}.json" + print(f"today is: {today.year}-{today.month}-{today.day} Fetching data from: {api_url}") + + try: + # Send GET request to the API + response = requests.get(api_url, timeout=10) + response.raise_for_status() # Raise exception for HTTP errors + + # Parse JSON response + data = response.json() + + # Extract and print all items under the "meals" section + if "meals" in data: + for meal in data["meals"]: + # Safely access the 'name' and 'price' keys + name = meal.get("name", "Unnamed meal") + price = comma_float(meal.get("price", "€ 0.0").split(" ")[1]) + products.add(Product(name=name, price=price)) + else: + print("No 'meals' section found in the JSON response.") + + if "vegetables" in data: + for vegetable in data["vegetables"]: + # Safely access the 'name' and 'price' keys + name = vegetable.split(":")[1][1:] + price = 0.0 + products.add(Product(name=name, price=price)) + else: + print("No 'vegetables' section found in the JSON response.") + # print("\nFull JSON response:\n") + # print(json.dumps(data, indent=4, sort_keys=True, ensure_ascii=False)) + + except requests.exceptions.HTTPError as http_err: + print(f"HTTP error occurred: {http_err}") + except Exception as e: + print(f"An error occurred: {e}") + + end_time = time.time() + elapsed_time = end_time - start_time + # Convert seconds to minutes and seconds + minutes = int(elapsed_time // 60) + seconds = elapsed_time % 60 + print(f"get_prices executed in {minutes} minute(s) and {seconds:.2f} second(s).") + return products, locatie diff --git a/sync_gitmate.py b/sync_gitmate.py index 12a9e7e..6e85d08 100644 --- a/sync_gitmate.py +++ b/sync_gitmate.py @@ -45,7 +45,7 @@ def get_repo(): else: print("Cloning repo") repo = git.Repo.clone_from( - f"https://{TOKEN}@{config['gitea']['server_url']}/{GIT_ORG}/{config['gitea']['remote_repo']}.git", + f"https://{TOKEN}@{config['gitea']['server_url']}/{GIT_ORG}/{config['gitea']['remote_repo']}", REPO_FOLDER, ) with repo.config_writer() as cw: @@ -62,8 +62,13 @@ def clear_repo(repo): def checkout_branch(repo, branch_name): repo.git.switch("master") + branches = repo.git.branch() + # Print the branches + print("Available branches:\n", branches) if branch_name in repo.heads: + # repo.git.branch("-D", branch_name) # Force delete the branch repo.git.switch(branch_name) + repo.git.pull("origin", branch_name) else: repo.git.switch("-c", branch_name) if branch_name in repo.remotes.origin.refs: @@ -77,21 +82,24 @@ def sync_file(repo, api_instance, file_info): path = file_info["local_file_path"] sync_to = file_info["metadata"]["sync-to"] - branch_name = f"codimd-sync_{sync_to}" + # branch_name = f"hlds-sync_{sync_to}" + branch_name = f"hlds_sync_{os.path.basename(sync_to).replace(".hlds", "")}" print(f"Starting sync of {path}") clear_repo(repo) print(f" Checking out onto branch: {branch_name}") checkout_branch(repo, branch_name) with open(path) as r: - pathlib.Path(f'{REPO_FOLDER}/{sync_to[:sync_to.rfind("/")]}').mkdir( - parents=True, exist_ok=True - ) - with open(f"{REPO_FOLDER}/{sync_to}", "w") as w: + # pathlib.Path(f"{REPO_FOLDER}/{sync_to}").mkdir( + # parents=True, exist_ok=True + # ) + print(sync_to) + with open(f"{sync_to}", "w") as w: w.write(r.read()) if repo.git.diff() or repo.untracked_files: print(" Note has changes. Making a commit.") - repo.index.add([sync_to]) - repo.index.commit("Updating file with codimd version") + print("working tree dir: ", repo.working_tree_dir) + repo.index.add([os.path.basename(sync_to)]) + repo.index.commit("Updating file with hlds version") print(f" Pushing to branch: {branch_name}") repo.git.push("-u", "origin", branch_name) @@ -103,7 +111,7 @@ def sync_file(repo, api_instance, file_info): branch_requests = [r for r in resp if r.head.ref == branch_name] if len(branch_requests) > 0: print( - " Creating a new merge request to update the git document with the new version from CodiMD." + " Creating a new merge request to update the git menu with the new version from the hlds menu." ) api_instance.repo_create_pull_request( GIT_ORG, @@ -111,21 +119,21 @@ def sync_file(repo, api_instance, file_info): body=giteapy.CreatePullRequestOption( base="master", head=branch_name, - title=f"[CodiMD sync] Update document {sync_to}", + title=f"[hlds sync] Update document {sync_to}", ), ) else: - print(" Creating a new merge request to add the document to git.") + print(" Creating a new merge request to add the Menu to git.") api_instance.repo_create_pull_request( GIT_ORG, GIT_REPO, body=giteapy.CreatePullRequestOption( base="master", head=branch_name, - title=f"[CodiMD sync] Add document {sync_to}", + title=f"[hlds sync] Add document {sync_to}", ), ) else: print(" Merge request was already open.") else: - print(" Note has no changes.") + print(" Menu has no changes.") diff --git a/utils.py b/utils.py index d70c745..e296f46 100644 --- a/utils.py +++ b/utils.py @@ -13,7 +13,7 @@ def comma_float(inp: str) -> float: return float(inp.replace(',', '.')) -def fetch_and_parse_html(url: str) -> BeautifulSoup: +def fetch_and_parse_html(url: str) -> BeautifulSoup | None: response = safe_get(url) if response == "": return None @@ -139,6 +139,7 @@ def parse_pdf(file_path, coords=None): return page.extract_text() except Exception as e: print(f"Failed to parse PDF: {e}") + return "" def parse_pdf_with_strip_split_enters(file_path, coords=None):