From 0cf7e59048507877f8a39b0b022f6bb88d92a476 Mon Sep 17 00:00:00 2001 From: MartinSchobben Date: Thu, 15 Aug 2024 10:58:37 +0200 Subject: [PATCH] improved quarto ref regex replace --- src/eo_datascience/clean_nb.py | 18 ++++++++++++++++-- tests/mock.ipynb | 2 +- tests/test_quarto_nb_conversions.py | 10 ++++++++-- 3 files changed, 25 insertions(+), 5 deletions(-) diff --git a/src/eo_datascience/clean_nb.py b/src/eo_datascience/clean_nb.py index 2c3a8b8..8e9fe03 100644 --- a/src/eo_datascience/clean_nb.py +++ b/src/eo_datascience/clean_nb.py @@ -1,6 +1,7 @@ import os import nbformat from pathlib import Path +import re def clean_up_frontmatter(dir = './notebooks', save=False): # Define the path to the notebooks @@ -41,14 +42,27 @@ def convert_refs(dir="./notebooks", save=True): for i in range(len(nb.cells)): if i != 0: if nb.cells[i]["cell_type"] == "markdown": - nb.cells[i].source = nb.cells[i].source.replace(r"[@", r"{cite}`").replace(r"]", r"`") - + nb.cells[i].source = quarto_ref_person_replace(nb.cells[i].source) + nb.cells[i].source = quarto_ref_time_replace(nb.cells[i].source) + # Save the notebook if save: nbformat.write(nb, nb_path) else: return nb +def quarto_ref_person_replace(quarto): + bibs = re.findall(r"(?<=\[\@)[^\]]+", quarto) + for i in bibs: + quarto = re.sub(r"\[\@" + i + "\]", r"{cite:p}`" + i + "`", quarto) + return quarto + +def quarto_ref_time_replace(quarto): + bibs = re.findall(r"(?<=\@)[^\s]+", quarto) + for i in bibs: + quarto = re.sub(r"\@" + i, r"{cite:t}`" + i + "`", quarto) + return quarto + def find_ipynb(dir): root = Path(dir).resolve() nb_paths = [root / file for file in os.listdir(root) if file.endswith('.ipynb')] diff --git a/tests/mock.ipynb b/tests/mock.ipynb index 34c2609..05e623f 100644 --- a/tests/mock.ipynb +++ b/tests/mock.ipynb @@ -15,7 +15,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "[@ref1] " + "lorem ipsum [@anon2024] and [@anon2025] and lorem ipsum @anon2024 and @anon2025" ] } ], diff --git a/tests/test_quarto_nb_conversions.py b/tests/test_quarto_nb_conversions.py index fcbeae9..87f405a 100644 --- a/tests/test_quarto_nb_conversions.py +++ b/tests/test_quarto_nb_conversions.py @@ -1,10 +1,16 @@ import nbformat from pathlib import Path import pytest -from eo_datascience.clean_nb import clean_up_frontmatter, convert_refs +from eo_datascience.clean_nb import clean_up_frontmatter, convert_refs, quarto_ref_person_replace, quarto_ref_time_replace def test_remove_front_matter(): assert clean_up_frontmatter("./tests", False)["cells"][0]["source"] == "# This a mock Jupyter file\nWe use it for testing\n" +def test_ref_regex(): + quarto = [r"lorem ipsum [@anon2024] and [@anon2025]", r"lorem ipsum @anon2024 and @anon2025"] + quarto[0] = quarto_ref_person_replace(quarto[0]) + quarto[1] = quarto_ref_time_replace(quarto[1]) + assert quarto == [r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025`", r"lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`"] + def test_conversion_of_refs(): - assert convert_refs("./tests", False)["cells"][1]["source"] == r'{cite}`ref1` ' \ No newline at end of file + assert convert_refs("./tests", False)["cells"][1]["source"] == r"lorem ipsum {cite:p}`anon2024` and {cite:p}`anon2025` and lorem ipsum {cite:t}`anon2024` and {cite:t}`anon2025`" \ No newline at end of file