-
Notifications
You must be signed in to change notification settings - Fork 22
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
#164 Transform common project URLs to repository
- Loading branch information
Showing
3 changed files
with
44 additions
and
21 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,11 @@ | |
from pygount.common import mapped_repr | ||
from pygount.git_storage import GitStorage, git_remote_url_and_revision_if_any | ||
|
||
GIT_REPO_REGEX = re.compile(r"^(https?://|git@)") | ||
HAS_URL_PREFIX = re.compile(r"^(https?://)") | ||
_ALLOWED_GIT_PLATFORMS = ["github.com", "bitbucket.org", "gitlab.com"] | ||
GIT_REPO_REGEX = re.compile( | ||
r"^(https?://|git@)({})/[\w-]+/[\w-]+".format("|".join(map(re.escape, _ALLOWED_GIT_PLATFORMS))) | ||
) | ||
|
||
# Attempt to import chardet. | ||
try: | ||
|
@@ -46,7 +50,6 @@ | |
[".?*", "_svn", "__pycache__"] # Subversion hack for Windows # Python byte code | ||
) | ||
|
||
|
||
#: Pygments token type; we need to define our own type because pygments' ``_TokenType`` is internal. | ||
TokenType = type(pygments.token.Token) | ||
|
||
|
@@ -591,29 +594,38 @@ def _paths_and_group_to_analyze(self, path_to_analyse_pattern, group=None) -> It | |
yield path_to_analyse, actual_group | ||
|
||
def _source_paths_and_groups_to_analyze(self, source_patterns_to_analyze) -> List[Tuple[str, str]]: | ||
def _process_source_pattern(source_pattern): | ||
remote_url, revision = git_remote_url_and_revision_if_any(source_pattern) | ||
if remote_url is not None: | ||
git_storage = GitStorage(remote_url, revision) | ||
self._git_storages.append(git_storage) | ||
git_storage.extract() | ||
# TODO#113: Find a way to exclude the ugly temp folder from the source path. | ||
result.extend(self._paths_and_group_to_analyze(git_storage.temp_folder)) | ||
else: | ||
has_url_prefix = re.match(HAS_URL_PREFIX, source_pattern) | ||
if has_url_prefix: | ||
git_url_match = re.match(GIT_REPO_REGEX, source_pattern) | ||
if git_url_match is not None: | ||
source_pattern = source_pattern.rstrip("/") | ||
_process_source_pattern(source_pattern + ".git") | ||
else: | ||
raise pygount.Error( | ||
f"invalid git url: {source_pattern} is not a valid git url, " | ||
f"it needs to match the pattern " | ||
f"http(s)://({'|'.join(_ALLOWED_GIT_PLATFORMS)})/<...>/<...>.git" | ||
) | ||
else: | ||
result.extend(self._paths_and_group_to_analyze(source_pattern_to_analyze)) | ||
|
||
assert source_patterns_to_analyze is not None | ||
result = [] | ||
# NOTE: We could avoid initializing `source_pattern_to_analyze` here by moving the `try` inside | ||
# the loop, but this would incor a performance overhead (ruff's PERF203). | ||
source_pattern_to_analyze = None | ||
try: | ||
for source_pattern_to_analyze in source_patterns_to_analyze: | ||
remote_url, revision = git_remote_url_and_revision_if_any(source_pattern_to_analyze) | ||
if remote_url is not None: | ||
git_storage = GitStorage(remote_url, revision) | ||
self._git_storages.append(git_storage) | ||
git_storage.extract() | ||
# TODO#113: Find a way to exclude the ugly temp folder from the source path. | ||
result.extend(self._paths_and_group_to_analyze(git_storage.temp_folder)) | ||
else: | ||
git_url_match = re.match(GIT_REPO_REGEX, source_pattern_to_analyze) | ||
if git_url_match is not None: | ||
raise pygount.Error( | ||
'URL to git repository must end with ".git", for example ' | ||
"[email protected]:roskakori/pygount.git or " | ||
"https://github.com/roskakori/pygount.git." | ||
) | ||
result.extend(self._paths_and_group_to_analyze(source_pattern_to_analyze)) | ||
_process_source_pattern(source_pattern_to_analyze) | ||
except OSError as error: | ||
assert source_pattern_to_analyze is not None | ||
raise OSError(f'cannot scan "{source_pattern_to_analyze}" for source files: {error}') from error | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,7 @@ | |
import pytest | ||
from pygments import lexers, token | ||
|
||
import pygount | ||
from pygount import Error as PygountError | ||
from pygount import analysis, common | ||
from pygount.analysis import ( | ||
|
@@ -64,11 +65,17 @@ def test_can_skip_dot_folder(self): | |
scanned_names = [os.path.basename(source_path) for source_path, _ in scanner.source_paths()] | ||
assert scanned_names == [name_to_include] | ||
|
||
def test_fails_on_non_repo_url(self): | ||
def test_succeeds_on_not_git_extension(self): | ||
non_repo_urls = [["https://github.com/roskakori/pygount/"], ["[email protected]:roskakori/pygount"]] | ||
for non_repo_url in non_repo_urls: | ||
with analysis.SourceScanner(non_repo_url) as scanner, pytest.raises(PygountError): | ||
next(scanner.source_paths()) | ||
with analysis.SourceScanner(non_repo_url) as scanner: | ||
_ = list(scanner.source_paths()) | ||
|
||
def test_fails_on_non_git_urls(self): | ||
non_repo_urls = [["https://no/git/url"], ["https://google.com/nogit"]] | ||
for non_repo_url in non_repo_urls: | ||
with analysis.SourceScanner(non_repo_url) as scanner, pytest.raises(pygount.Error): | ||
_ = list(scanner.source_paths()) | ||
|
||
def test_can_find_python_files_in_dot(self): | ||
scanner = analysis.SourceScanner(["."], "py") | ||
|