Skip to content

Commit

Permalink
feat: support url arguments
Browse files Browse the repository at this point in the history
  • Loading branch information
nullswan committed Apr 29, 2022
1 parent 9333c68 commit 0e370c0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 4 deletions.
5 changes: 3 additions & 2 deletions graphinder/io/readers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@
from io import TextIOWrapper

from graphinder.pool.domain import Domain
from graphinder.utils.filters import transform_url_in_domain
from graphinder.utils.logger import get_logger


def read_domains(file: TextIOWrapper | None, domain: str | None) -> list[Domain]:
"""Read domains from file."""

if domain is not None:
return [Domain(domain)]
return [Domain(transform_url_in_domain(domain))]

if file is None:
get_logger('io').critical('no input file specified, skipping reading domains..')
return []

urls: list[str] = list(set(file.read().splitlines()))
domains: list[Domain] = [Domain(url) for url in urls]
domains: list[Domain] = [Domain(transform_url_in_domain(url)) for url in urls]

get_logger('io').success(f'found { len(domains) } domains.')

Expand Down
18 changes: 18 additions & 0 deletions graphinder/utils/filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def filter_urls(urls: set[Url]) -> set[Url]:

def remove_duplicate_domains(domains: list[str]) -> list[str]:
"""if domains has example.com and www.example.com this will remove www.example.com."""

corrected_domains = []

for domain in domains:
Expand All @@ -82,3 +83,20 @@ def remove_duplicate_domains(domains: list[str]) -> list[str]:
corrected_domains.append(domain)

return corrected_domains


def transform_url_in_domain(url: str) -> str:
"""Transform a given url in domain.
http(s)://(www.)
"""

if url.startswith('http://'):
url = url.lstrip('http://')
elif url.startswith('https://'):
url = url.lstrip('https://')

if url.startswith('www.'):
url = url.lstrip('www.')

return url
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "graphinder"
version = "1.0.30"
version = "1.0.31"
description = "Escape Graphinder"
authors = ["Escape Technologies SAS <[email protected]>"]
maintainers = [
Expand Down
10 changes: 9 additions & 1 deletion tests/unit/utils/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from graphinder.entities.pool import Url
from graphinder.io.providers import gql_endpoints_characterizer
from graphinder.utils.filters import filter_common, filter_urls, remove_duplicate_domains
from graphinder.utils.filters import filter_common, filter_urls, remove_duplicate_domains, transform_url_in_domain


def test_filter_common() -> None:
Expand Down Expand Up @@ -63,3 +63,11 @@ def test_remove_duplicate_domains() -> None:
assert remove_duplicate_domains(domains) == [
'example.com',
]


def test_transform_url_in_domain() -> None:
"""test for transform_url_in_domain."""

url: str = 'https://www.example.com'

assert transform_url_in_domain(url) == 'example.com'

0 comments on commit 0e370c0

Please sign in to comment.