Skip to content

Commit

Permalink
#164 Transform common project URLs to repository
Browse files Browse the repository at this point in the history
  • Loading branch information
TD-Base committed Jul 13, 2024
1 parent 6697e79 commit dbdc0b3
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 21 deletions.
4 changes: 4 additions & 0 deletions docs/changes.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ Version 1.8.1, 2024-07-xx
`#160 <https://github.com/roskakori/pygount/issues/160>`_).
* Removed deprecated code: (contributed by Marco Gambone and Niels Vanden Bussche, issue
`#47 <https://github.com/roskakori/pygount/issues/47>`_).
* Fix silent error on git failing: (contributed by Tom De Bièvre, issue
`#162 <https://github.com/roskakori/pygount/issues/162>`_)
* Transform common project URLs to repository: (contributed by Tom De Bièvre, issue
`#164 <https://github.com/roskakori/pygount/issues/164>`_)

Version 1.8.0, 2024-05-13

Expand Down
48 changes: 30 additions & 18 deletions pygount/analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,11 @@
from pygount.common import mapped_repr
from pygount.git_storage import GitStorage, git_remote_url_and_revision_if_any

GIT_REPO_REGEX = re.compile(r"^(https?://|git@)")
HAS_URL_PREFIX = re.compile(r"^(https?://)")
_ALLOWED_GIT_PLATFORMS = ["github.com", "bitbucket.org", "gitlab.com"]
GIT_REPO_REGEX = re.compile(
r"^(https?://|git@)({})/[\w-]+/[\w-]+".format("|".join(map(re.escape, _ALLOWED_GIT_PLATFORMS)))
)

# Attempt to import chardet.
try:
Expand All @@ -46,7 +50,6 @@
[".?*", "_svn", "__pycache__"] # Subversion hack for Windows # Python byte code
)


#: Pygments token type; we need to define our own type because pygments' ``_TokenType`` is internal.
TokenType = type(pygments.token.Token)

Expand Down Expand Up @@ -591,29 +594,38 @@ def _paths_and_group_to_analyze(self, path_to_analyse_pattern, group=None) -> It
yield path_to_analyse, actual_group

def _source_paths_and_groups_to_analyze(self, source_patterns_to_analyze) -> List[Tuple[str, str]]:
def _process_source_pattern(source_pattern):
remote_url, revision = git_remote_url_and_revision_if_any(source_pattern)
if remote_url is not None:
git_storage = GitStorage(remote_url, revision)
self._git_storages.append(git_storage)
git_storage.extract()
# TODO#113: Find a way to exclude the ugly temp folder from the source path.
result.extend(self._paths_and_group_to_analyze(git_storage.temp_folder))
else:
has_url_prefix = re.match(HAS_URL_PREFIX, source_pattern)
if has_url_prefix:
git_url_match = re.match(GIT_REPO_REGEX, source_pattern)
if git_url_match is not None:
source_pattern = source_pattern.rstrip("/")
_process_source_pattern(source_pattern + ".git")
else:
raise pygount.Error(
f"invalid git url: {source_pattern} is not a valid git url, "
f"it needs to match the pattern "
f"http(s)://({'|'.join(_ALLOWED_GIT_PLATFORMS)})/<...>/<...>.git"
)
else:
result.extend(self._paths_and_group_to_analyze(source_pattern_to_analyze))

assert source_patterns_to_analyze is not None
result = []
# NOTE: We could avoid initializing `source_pattern_to_analyze` here by moving the `try` inside
# the loop, but this would incor a performance overhead (ruff's PERF203).
source_pattern_to_analyze = None
try:
for source_pattern_to_analyze in source_patterns_to_analyze:
remote_url, revision = git_remote_url_and_revision_if_any(source_pattern_to_analyze)
if remote_url is not None:
git_storage = GitStorage(remote_url, revision)
self._git_storages.append(git_storage)
git_storage.extract()
# TODO#113: Find a way to exclude the ugly temp folder from the source path.
result.extend(self._paths_and_group_to_analyze(git_storage.temp_folder))
else:
git_url_match = re.match(GIT_REPO_REGEX, source_pattern_to_analyze)
if git_url_match is not None:
raise pygount.Error(
'URL to git repository must end with ".git", for example '
"[email protected]:roskakori/pygount.git or "
"https://github.com/roskakori/pygount.git."
)
result.extend(self._paths_and_group_to_analyze(source_pattern_to_analyze))
_process_source_pattern(source_pattern_to_analyze)
except OSError as error:
assert source_pattern_to_analyze is not None
raise OSError(f'cannot scan "{source_pattern_to_analyze}" for source files: {error}') from error
Expand Down
13 changes: 10 additions & 3 deletions tests/test_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import pytest
from pygments import lexers, token

import pygount
from pygount import Error as PygountError
from pygount import analysis, common
from pygount.analysis import (
Expand Down Expand Up @@ -64,11 +65,17 @@ def test_can_skip_dot_folder(self):
scanned_names = [os.path.basename(source_path) for source_path, _ in scanner.source_paths()]
assert scanned_names == [name_to_include]

def test_fails_on_non_repo_url(self):
def test_succeeds_on_not_git_extension(self):
non_repo_urls = [["https://github.com/roskakori/pygount/"], ["[email protected]:roskakori/pygount"]]
for non_repo_url in non_repo_urls:
with analysis.SourceScanner(non_repo_url) as scanner, pytest.raises(PygountError):
next(scanner.source_paths())
with analysis.SourceScanner(non_repo_url) as scanner:
_ = list(scanner.source_paths())

def test_fails_on_non_git_urls(self):
non_repo_urls = [["https://no/git/url"], ["https://google.com/nogit"]]
for non_repo_url in non_repo_urls:
with analysis.SourceScanner(non_repo_url) as scanner, pytest.raises(pygount.Error):
_ = list(scanner.source_paths())

def test_can_find_python_files_in_dot(self):
scanner = analysis.SourceScanner(["."], "py")
Expand Down

0 comments on commit dbdc0b3

Please sign in to comment.