Skip to content

Commit

Permalink
adding more testing, removing some prints
Browse files Browse the repository at this point in the history
  • Loading branch information
OSintt committed Jul 16, 2024
1 parent f74b8f1 commit 6b6201d
Show file tree
Hide file tree
Showing 7 changed files with 35 additions and 10 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,6 @@ docker-compose.override.yml
*.log

# twine
$HOME

.pytest_cache
.pypirc
Expand Down
9 changes: 5 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@ build-backend = "setuptools.build_meta"

[tool.poetry]
name = "xvideos-py"
version = "0.1.6"
version = "0.1.9"
description = "A powerful Python library to scrape xvideos.com"
authors = ["OSintt <[email protected]>"]
readme = "README.md"

[tool.poetry.dependencies]
python = "^3.6"
requests = "^2.0"
beautifulsoup4 = "^4.0"
python = "^3.10"
requests = "^2.32.3"
beautifulsoup4 = "^4.12.3"
playwright = "^1.45.0"

[tool.poetry.dev-dependencies]
pytest = "^6.0"
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

setup(
name="xvideos-py",
version="0.1.6",
version="0.1.9",
description="A powerful Python library to scrape xvideos.com",
author="OSintt",
author_email="[email protected]",
Expand All @@ -17,6 +17,7 @@
install_requires=[
"beautifulsoup4",
"requests",
"playwright"
],
tests_require=[
"pytest",
Expand Down
11 changes: 11 additions & 0 deletions tests/test_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,15 @@ def test_search_without_page(scraper):
result = scraper.search(k="example", sort="relevance")
assert 'pagination' in result

def test_details_with_invalid_url(scraper):
invalid_url = 'https://invalidsite.com/video'
with pytest.raises(ValueError):
scraper.details(invalid_url)

def test_details_with_valid_url(scraper):
valid_url = 'https://www.xvideos.com/video.udefpih987f/mi_madrastra_perdio_apuesta_en_final_argentina_vs_colombia_y_me_lo_chupa'
details = scraper.details(valid_url)
assert 'title' in details
assert 'url' in details
assert 'views' in details
assert 'image' in details
1 change: 0 additions & 1 deletion xvideos/videos/base/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,4 @@ def scrape(self, endpoint: str, params: dict) -> str:
def get_soup(self, endpoint: str, params: dict) -> BeautifulSoup:
full_url = urljoin(self.base_url, endpoint)
response = requests.get(full_url, params=params)
print(response.url)
return BeautifulSoup(response.text, 'html.parser')
3 changes: 1 addition & 2 deletions xvideos/videos/base/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,5 +114,4 @@ def parse_video(video):
'views': views,
}
except ValueError as e:
print(f"Error parsing video: {e}")
return None
raise ValueError(f"Error parsing video: {e}")
17 changes: 16 additions & 1 deletion xvideos/videos/details.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,29 @@
from .base import VideoDetailsScraper

import re
from functools import wraps

def validate_video_url(func):
@wraps(func)
def wrapper(self, video_url, *args, **kwargs):
pattern = re.compile(r'^https://(www\.)?xvideos\.com/video.[a-zA-Z0-9._-]+/.+')
if not pattern.match(video_url):
raise ValueError(f"Invalid video URL: {video_url}")
return func(self, video_url, *args, **kwargs)
return wrapper


class DetailsScraper(VideoDetailsScraper):
@validate_video_url
def details(self, video_url: str) -> str:
return self.scrape(video_url)
@validate_video_url
def download_high_quality(self, video_url: str, filename: str):
video_details = self.scrape(video_url)
high_url = video_details.get('files', {}).get('high')
if high_url:
return self.download_high(high_url, filename)

@validate_video_url
def download_low_quality(self, video_url: str, filename: str):
video_details = self.scrape(video_url)
low_url = video_details.get('files', {}).get('low')
Expand Down

0 comments on commit 6b6201d

Please sign in to comment.