Skip to content

Commit

Permalink
ensure correct casing when opening articles to check identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
Walavouchey committed Nov 30, 2023
1 parent d093514 commit cf68a12
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 17 deletions.
9 changes: 5 additions & 4 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@
import tests.visual

from wikitools import console
from wikitools.file_utils import exists_case_insensitive
from wikitools.file_utils import file_tree


sys.path.append(os.path.join(os.path.dirname(__file__), ".."))


def clear_function_cache():
# exists_case_insensitive does some internal caching of the directory tree, making it stateful
# exists_case_insensitive and get_canonical_path_casing cache all directory paths
# during normal execution, the current working directory never changes, but tests use a new temporary directory for each test case
if hasattr(exists_case_insensitive, 'all_article_paths_lowercased'):
delattr(exists_case_insensitive, 'all_article_paths_lowercased')
if hasattr(file_tree, 'cache'):
delattr(file_tree, 'cache')


@pytest.fixture(scope='function')
Expand Down Expand Up @@ -92,6 +92,7 @@ def get_visual_tests():


def run_visual_test(tests, test_index, case_index):
clear_function_cache()
test = tests[test_index]
print(f"({test_index + 1}/{len(tests)})", console.red(test.name), "-", test.description)
test_case = test.cases[case_index]
Expand Down
28 changes: 22 additions & 6 deletions wikitools/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,27 @@ def normalised(path: str) -> str:
return normalised


"""
Returns a dictionary of file and directory paths, with a lowercased path for the key and the original casing for the value.
Useful for looking up file paths case-insensitively on case-sensitive file systems.
"""
def file_tree():
# this cache would only become invalid when the current working directory changes, which only happens in tests and not during normal execution
if not hasattr(file_tree, "cache"):
tree = {normalised(article_path.lower()): normalised(article_path) for article_path in itertools.chain(list_all_dirs(["."]), list_all_files(["."]))}
setattr(file_tree, "cache", tree)
return getattr(file_tree, "cache")


def get_canonical_path_casing(path: pathlib.Path) -> pathlib.Path:
"""
Converts a file/directory path into a path with the correct casing.
The path must exist (throws KeyError otherwise)
"""

return pathlib.Path(file_tree()[normalised(os.path.relpath(path.as_posix()).lower())])


def exists_case_sensitive(path: pathlib.Path) -> bool:
"""
Case-sensitive file/directory existence check
Expand All @@ -49,13 +70,8 @@ def exists_case_insensitive(path: pathlib.Path) -> bool:
return path.exists()
else:
# case-insensitive directory/file existence checking isn't trivial in case-sensitive file systems because os-provided existence checks can't be relied upon
# this cache would only become invalid when the current working directory changes, which only happens in tests and not during normal execution
if not hasattr(exists_case_insensitive, 'all_article_paths_lowercased'):
article_set = set(normalised(article_path.lower()) for article_path in itertools.chain(list_all_dirs(["."]), list_all_files(["."])))
setattr(exists_case_insensitive, 'all_article_paths_lowercased', article_set)
all_article_paths_lowercased = getattr(exists_case_insensitive, 'all_article_paths_lowercased')

return normalised(os.path.relpath(path.as_posix()).lower()) in all_article_paths_lowercased
return normalised(os.path.relpath(path.as_posix()).lower()) in file_tree()


def is_newspost(path: str) -> bool:
Expand Down
18 changes: 11 additions & 7 deletions wikitools/link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from wikitools import redirect_parser, reference_parser, errors, link_parser, article_parser
from wikitools import console
from wikitools.file_utils import exists_case_sensitive, exists_case_insensitive
from wikitools.file_utils import get_canonical_path_casing
from wikitools.file_utils import is_article


Expand Down Expand Up @@ -186,17 +187,20 @@ def check_link(
if not parsed_location.fragment:
return None

target_path = repo_path.path
if os.name != 'nt' and not case_sensitive:
target_path = get_canonical_path_casing(target_path)

# link to a section
match repo_path.path_type:
case PathType.GITHUB:
# github links can either be directories or files
# but section links are only relevant for markdown files
if repo_path.path.suffix == ".md":
target_file = repo_path.path
raw_path = repo_path.path.as_posix()
raw_path = target_path.as_posix()
if raw_path not in all_articles:
# this is safe to do since the caller iterates over a copy of all_articles -> we can modify it as we wish
all_articles[raw_path] = article_parser.parse(target_file)
all_articles[raw_path] = article_parser.parse(target_path)

target_article = all_articles[raw_path]

Expand All @@ -209,22 +213,22 @@ def check_link(
return errors.MissingIdentifierError(link, raw_path, parsed_location.fragment, False, translation_outdated)
case PathType.NEWS:
# always a file path
raw_path = repo_path.path.as_posix()
raw_path = target_path.as_posix()
if raw_path not in all_articles:
all_articles[raw_path] = article_parser.parse(repo_path.path)
all_articles[raw_path] = article_parser.parse(target_path)
target_article = all_articles[raw_path]

if parsed_location.fragment not in target_article.identifiers:
return errors.MissingIdentifierError(link, raw_path, parsed_location.fragment, False, False)
case PathType.WIKI:
# directory -> need to find the target article; it could be a translation
# XXX(TicClick): this part assumes there is always an English version of the article in a folder
target_file = repo_path.path / article.filename
target_file = target_path / article.filename
translation = target_file # verified to be the case later
no_translation_available = article.filename != 'en.md' and not target_file.is_file()

if no_translation_available:
target_file = repo_path.path / 'en.md'
target_file = target_path / 'en.md'

raw_path = target_file.as_posix()
if raw_path not in all_articles:
Expand Down

0 comments on commit cf68a12

Please sign in to comment.