Skip to content

Commit

Permalink
make case sensitivity toggleable (and default to off)
Browse files Browse the repository at this point in the history
  • Loading branch information
Walavouchey committed Nov 23, 2023
1 parent 89d277e commit 46ff072
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 28 deletions.
39 changes: 22 additions & 17 deletions tests/test_link_checker.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import pathlib
import textwrap

import pytest

import tests.conftest
import tests.utils as utils

from wikitools import article_parser, link_checker, link_parser, redirect_parser, errors as error_types, reference_parser


Expand All @@ -13,31 +16,33 @@ def dummy_article(path):


class TestArticleLinks:
def test__valid_absolute_link(self, root):
utils.create_files(
root,
('wiki/First_article/en.md', '# First article')
)

link = link_parser.find_link('Check the [first article](/wiki/First_article).')
assert link
error = link_checker.check_link(
article=dummy_article('does/not/matter'), link=link, redirects={}, references={}, all_articles={}
)
assert error is None

def test__invalid_absolute_link__wrong_capitalisation(self, root):
@pytest.mark.parametrize(
"payload",
[
{"case_sensitive": False, "capitalisation_correct": False, "should_error": False},
{"case_sensitive": False, "capitalisation_correct": True, "should_error": False},
{"case_sensitive": True, "capitalisation_correct": False, "should_error": True},
{"case_sensitive": True, "capitalisation_correct": True, "should_error": False},
]
)
def test__valid_absolute_link(self, root, payload):
utils.create_files(
root,
('wiki/First_article/en.md', '# First article')
)

link = link_parser.find_link('Check the [first article](/wiki/First_Article).')
link = link_parser.find_link('Check the [first article](/wiki/{}).'
.format("First_article" if payload["capitalisation_correct"] else "First_Article"))
assert link
error = link_checker.check_link(
article=dummy_article('does/not/matter'), link=link, redirects={}, references={}, all_articles={}
article=dummy_article('does/not/matter'),
link=link, redirects={}, references={}, all_articles={},
case_sensitive=payload["case_sensitive"]
)
assert isinstance(error, error_types.LinkNotFoundError)
if payload["should_error"]:
assert isinstance(error, error_types.LinkNotFoundError)
else:
assert error is None

def test__invalid_absolute_link(self, root):
utils.create_files(
Expand Down
19 changes: 16 additions & 3 deletions wikitools/file_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,9 @@ def normalised(path: str) -> str:
return normalised


def exists(path: pathlib.Path):
def exists_case_sensitive(path: pathlib.Path):
"""
Case-sensitive file existence check
File paths are case-insensitive on some operating systems like Windows, but we rely on file existence checks to take casing into account
"""

if os.name == 'nt':
Expand All @@ -40,6 +38,21 @@ def exists(path: pathlib.Path):
return path.exists()


def exists_case_insensitive(path: pathlib.Path):
"""
Case-insensitive file existence check
"""

if os.name == 'nt':
return path.exists()
else:
if not hasattr(exists_case_insensitive, 'all_article_paths_lowercased'):
setattr(exists_case_insensitive, 'all_article_paths_lowercased', set(list_all_articles_and_newsposts()))
all_article_paths_lowercased = getattr(exists_case_insensitive, 'all_article_paths_lowercased')

return normalised(path.as_posix()) in all_article_paths_lowercased


def is_newspost(path: str) -> bool:
return (
normalised(os.path.dirname(path)).startswith("news") and
Expand Down
21 changes: 14 additions & 7 deletions wikitools/link_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
import typing

from wikitools import redirect_parser, reference_parser, errors, link_parser, article_parser
from wikitools import console, file_utils
from wikitools import console
from wikitools.file_utils import exists_case_sensitive, exists_case_insensitive as exists


def check_link(
article: article_parser.Article, link: link_parser.Link,
redirects: redirect_parser.Redirects, references: reference_parser.References,
all_articles: typing.Dict[str, article_parser.Article]
all_articles: typing.Dict[str, article_parser.Article],
case_sensitive: bool = False
) -> typing.Optional[errors.LinkError]:
"""
Verify that the link is valid:
Expand All @@ -19,6 +21,10 @@ def check_link(
- Relative links are parsed under the assumption that they are located inside the current article's directory
"""

if case_sensitive:
global exists
exists = exists_case_sensitive

# resolve the link, if possible
reference = link.resolve(references)
if reference is None and link.is_reference:
Expand All @@ -35,7 +41,7 @@ def check_link(
repo_target = pathlib.Path(f"news/{year}/{target.name}")
location = '/' + repo_target.as_posix()

if not file_utils.exists(repo_target):
if not exists(repo_target):
# news posts don't have redirects
return errors.LinkNotFoundError(link, reference, location)
else:
Expand Down Expand Up @@ -67,15 +73,15 @@ def check_link(

target = pathlib.Path(location[1:]) # strip leading slash
# no article? could be a redirect
if not file_utils.exists(target):
if not exists(target):
redirect_source = target.relative_to('wiki').as_posix()
try:
redirect_destination, redirect_line_no = redirects[redirect_source.lower()]
except KeyError:
return errors.LinkNotFoundError(link, reference, location)

target = pathlib.Path('wiki') / redirect_destination
if not file_utils.exists(target):
if not exists(target):
return errors.BrokenRedirectError(link, redirect_source, redirect_line_no, redirect_destination)

# link to an article in general, article exists -> good
Expand Down Expand Up @@ -117,7 +123,8 @@ def check_link(

def check_article(
article: article_parser.Article, redirects: redirect_parser.Redirects,
all_articles: typing.Dict[str, article_parser.Article]
all_articles: typing.Dict[str, article_parser.Article],
case_sensitive: bool = False
) -> typing.Dict[int, typing.List[errors.LinkError]]:
"""
Try resolving links in the article to other articles or files.
Expand All @@ -127,7 +134,7 @@ def check_article(
for lineno, line in article.lines.items():
local_errors = [
errors for errors in (
check_link(article, link, redirects, article.references, all_articles)
check_link(article, link, redirects, article.references, all_articles, case_sensitive)
for link in line.links
)
if errors
Expand Down
4 changes: 3 additions & 1 deletion wikitools_cli/commands/check_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ def parse_args(args):
parser.add_argument("--to-sections-in-outdated-translations", action='store_true', help="check section links in translations that point to outdated translations of the same language")
parser.add_argument("--to-sections-in-missing-translations", action='store_true', help="check section links in translations that point to articles with no available translations of the same language")

parser.add_argument("--case-sensitive", action='store_true', help="check file existence case-sensitively")

parser.add_argument("-r", "--root", help="specify repository root, current working directory assumed otherwise")
return parser.parse_args(args)

Expand Down Expand Up @@ -119,7 +121,7 @@ def main(*args):
link_count += sum(len(_.links) for _ in a.lines.values())
file_count += 1

errors = link_checker.check_article(a, redirects, articles)
errors = link_checker.check_article(a, redirects, articles, args.case_sensitive)

if not args.to_sections_in_outdated_translations:
errors = filter_errors(lambda e: not (isinstance(e, error_types.MissingIdentifierError) and e.translation_outdated), errors)
Expand Down

0 comments on commit 46ff072

Please sign in to comment.