diff --git a/src/web/scrapers.py b/src/web/scrapers.py
index 8476185d..c16ebb37 100644
--- a/src/web/scrapers.py
+++ b/src/web/scrapers.py
@@ -3,13 +3,14 @@
import pathlib
import re
import urllib.parse
+import zoneinfo
from datetime import datetime, timedelta
from typing import Any, Protocol, TypeAlias, TypeVar
import eventbrite.access_methods
import requests
-import zoneinfo
-from bs4 import BeautifulSoup, Tag
+from bs4 import BeautifulSoup
+from bs4.element import Tag
from django.conf import settings
from django.utils import timezone
from eventbrite import Eventbrite
@@ -26,7 +27,7 @@ def get_venue(self, id, **data):
def get_event_description(self, id, **data):
- return self.get("/events/{0}/description//".format(id), data=data)
+ return self.get("/events/{0}/description/".format(id), data=data)
setattr(eventbrite.access_methods.AccessMethodsMixin, "get_venue", get_venue)
@@ -39,10 +40,23 @@ def scrape(self, url: str) -> ST:
...
-EventScraperResult: TypeAlias = tuple[models.Event, list[models.Tag]]
+ImageResult: TypeAlias = tuple[str, bytes]
+EventScraperResult: TypeAlias = tuple[models.Event, list[models.Tag], ImageResult | None]
+
+
+class ScraperMixin:
+ def _get_image(self, image_url: str) -> ImageResult:
+ image_name = self._parse_image_name(image_url)
+ response = requests.get(image_url, timeout=10)
+ response.raise_for_status()
+ image = response.content
+ return image_name, image
+ def _parse_image_name(self, image_url: str) -> str:
+ return image_url.rsplit("/", maxsplit=1)[-1].split("?", maxsplit=1)[0]
-class MeetupScraperMixin:
+
+class MeetupScraperMixin(ScraperMixin):
"""Common Meetup scraping functionality."""
def _parse_apollo_state(self, soup: BeautifulSoup) -> dict:
@@ -84,8 +98,8 @@ def scrape(self, url: str) -> list[str]:
else:
upcoming_section = soup.find_all(id="upcoming-section")[0]
events = upcoming_section.find_all_next(id=re.compile(r"event-card-"))
- filtered_event_containers = [event for event in events if self._filter_event_tag(event)]
- event_urls = [event_container["href"] for event_container in filtered_event_containers]
+ filtered_event_containers: list[Tag] = [event for event in events if self._filter_event_tag(event)] # type: ignore
+ event_urls: list[str] = [event_container["href"] for event_container in filtered_event_containers] # type: ignore
return [url for url in event_urls if self._filter_repeating_events(url)]
@@ -136,6 +150,8 @@ def scrape(self, url: str) -> EventScraperResult:
location_data = apollo_state[event_json["venue"]["__ref"]]
location = f"{location_data['address']}, {location_data['city']}, {location_data['state']}"
external_id = event_json["id"]
+ event_photo = event_json["featuredEventPhoto"]["__ref"]
+ image_url = apollo_state[event_photo].get("highResUrl", apollo_state[event_photo]["baseUrl"])
except KeyError:
name = self._parse_name(soup)
description = self._parse_description(soup)
@@ -143,20 +159,22 @@ def scrape(self, url: str) -> EventScraperResult:
duration = self._parse_duration(soup)
location = self._parse_location(soup)
external_id = self._parse_external_id(url)
+ image_url = self._parse_image(soup)
+
+ if image_url:
+ image_result = self._get_image(image_url)
tags = self._parse_tags(soup)
- return (
- models.Event(
- name=name,
- description=description,
- date_time=date_time,
- duration=duration,
- location=location,
- external_id=external_id,
- url=url,
- ),
- tags,
+ event = models.Event(
+ name=name,
+ description=description,
+ date_time=date_time,
+ duration=duration,
+ location=location,
+ external_id=external_id,
+ url=url,
)
+ return (event, tags, image_result)
def _parse_name(self, soup: BeautifulSoup) -> str:
name: str = soup.find_all("h1")[0].text
@@ -171,10 +189,16 @@ def _parse_description(self, soup: BeautifulSoup) -> str:
return description
def _parse_date_time(self, soup: BeautifulSoup) -> datetime:
- return datetime.fromisoformat(soup.find_all("time")[0]["datetime"])
+ time: Tag | None = soup.find("time") # type: ignore
+ if not time:
+ raise ValueError("could not find time")
+ dt: str = time["datetime"] # type: ignore
+ return datetime.fromisoformat(dt)
def _parse_duration(self, soup: BeautifulSoup) -> timedelta:
- time: Tag = soup.find_all("time")[0]
+ time: Tag | None = soup.find("time") # type: ignore
+ if not time:
+ raise ValueError("could not find time")
matches = self.DURATION_PATTERN.findall(time.text)
if not matches:
raise ValueError("Could not find duration from:", time.text)
@@ -199,8 +223,18 @@ def _parse_tags(self, soup: BeautifulSoup) -> list[models.Tag]:
tags = [re.sub(r"\s+", " ", t.text) for t in tags] # Some tags have newlines & extra spaces
return [models.Tag(value=t) for t in tags]
+ def _parse_image(self, soup: BeautifulSoup) -> str | None:
+ picture = soup.find(attrs={"data-testid": "event-description-image"})
+ if not picture:
+ return None
+ img: Tag | None = picture.find("img") # type: ignore
+ if not img:
+ return None
+ src: str = img["src"] # type: ignore
+ return src
+
-class EventbriteScraper(Scraper[list[EventScraperResult]]):
+class EventbriteScraper(ScraperMixin, Scraper[list[EventScraperResult]]):
def __init__(self, api_token: str | None = None):
self.client = Eventbrite(api_token or settings.EVENTBRITE_API_TOKEN)
self._location_by_venue_id: dict[str, str] = {}
@@ -209,11 +243,12 @@ def scrape(self, organization_id: str) -> list[EventScraperResult]:
response = self.client.get_organizer_events(
organization_id,
status="live",
+ expand="logo",
)
- events_and_tags = [self.map_to_event(eventbrite_event) for eventbrite_event in response["events"]]
- return events_and_tags
+ results = [self.map_to_event(eventbrite_event) for eventbrite_event in response["events"]]
+ return results
- def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[models.Tag]]:
+ def map_to_event(self, eventbrite_event: dict) -> EventScraperResult:
name = eventbrite_event["name"]["text"]
start = datetime.fromisoformat(eventbrite_event["start"]["utc"])
end = datetime.fromisoformat(eventbrite_event["end"]["utc"])
@@ -230,6 +265,16 @@ def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[model
# short description
description = eventbrite_event["description"]["html"]
+ try:
+ image_url = eventbrite_event["logo"]["original"]["url"]
+ image_result = self._get_image(image_url)
+ except (KeyError, requests.HTTPError):
+ try:
+ image_url = eventbrite_event["logo"]["url"]
+ image_result = self._get_image(image_url)
+ except KeyError:
+ image_result = None
+
event = models.Event(
name=name,
description=description,
@@ -249,7 +294,7 @@ def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[model
# if subcategory_name:
# tags.append(models.Tag(value=subcategory_name))
- return event, []
+ return event, [], image_result
@functools.lru_cache
def _get_venue_location(self, venue_id: str) -> str:
diff --git a/src/web/services.py b/src/web/services.py
index f0e295e1..9823635b 100644
--- a/src/web/services.py
+++ b/src/web/services.py
@@ -1,43 +1,91 @@
from datetime import timedelta
from typing import Protocol
+from django.core.files.base import ContentFile
from django.forms.models import model_to_dict
from django.utils import timezone
from web import models, scrapers
+class EventService:
+ def save_event_from_result(
+ self,
+ result: scrapers.EventScraperResult,
+ tech_group: models.TechGroup,
+ ) -> None:
+ event, tags, image_result = result
+ event = self._save_event(event, tech_group)
+ self._save_tags(event, tags)
+ if image_result is not None:
+ self._save_image(event, image_result)
+
+ def _save_event(
+ self,
+ event: models.Event,
+ tech_group: models.TechGroup,
+ ) -> models.Event:
+ event.group = tech_group
+ event.approved_at = timezone.localtime()
+ defaults = model_to_dict(event, exclude=["id"])
+ defaults["group"] = tech_group
+
+ del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved.
+ del defaults["image"]
+
+ updated_event, _ = models.Event.objects.update_or_create(
+ external_id=event.external_id,
+ defaults=defaults,
+ )
+ return updated_event
+
+ def _save_tags(
+ self,
+ event: models.Event,
+ tags: list[models.Tag],
+ ) -> None:
+ for tag in tags:
+ tag, _ = models.Tag.objects.get_or_create(value=tag)
+ event.tags.add(tag)
+
+ def _save_image(
+ self,
+ event: models.Event,
+ image_result: scrapers.ImageResult,
+ ) -> None:
+ image_name, image = image_result
+
+ # If images are the same, don't re-upload
+ has_existing_image = bool(event.image)
+ if has_existing_image:
+ existing_image = event.image.read()
+ if existing_image == image:
+ return
+
+ file = ContentFile(image, name=image_name)
+ event.image.save(image_name, file)
+
+
class MeetupService:
def __init__(
self,
homepage_scraper: scrapers.Scraper[list[str]] | None = None,
event_scraper: scrapers.Scraper[scrapers.EventScraperResult] | None = None,
+ event_service: EventService | None = None,
) -> None:
self.homepage_scraper: scrapers.Scraper[list[str]] = homepage_scraper or scrapers.MeetupHomepageScraper()
self.event_scraper: scrapers.Scraper[scrapers.EventScraperResult] = (
event_scraper or scrapers.MeetupEventScraper()
)
+ self.event_service = event_service or EventService()
def save_events(self) -> None:
"""Scrape upcoming events from Meetup and save them to the database."""
- now = timezone.localtime()
for tech_group in models.TechGroup.objects.filter(homepage__icontains="meetup.com"):
event_urls = self.homepage_scraper.scrape(tech_group.homepage) # type: ignore
for event_url in event_urls: # TODO: parallelize (with async?)
- event, tags = self.event_scraper.scrape(event_url)
- event.group = tech_group
- event.approved_at = now
- defaults = model_to_dict(event, exclude=["id"])
- defaults["group"] = tech_group
-
- del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved.
- new_event, _ = models.Event.objects.update_or_create(
- external_id=event.external_id,
- defaults=defaults,
- )
- for tag in tags:
- tag, _ = models.Tag.objects.get_or_create(value=tag)
- new_event.tags.add(tag)
+ result = self.event_scraper.scrape(event_url)
+ self.event_service.save_event_from_result(result, tech_group)
class EventbriteService:
@@ -46,28 +94,21 @@ class EventbriteService:
def __init__(
self,
events_scraper: scrapers.Scraper[list[scrapers.EventScraperResult]] | None = None,
+ event_service: EventService | None = None,
) -> None:
self.events_scraper = events_scraper or scrapers.EventbriteScraper()
+ self.event_service = event_service or EventService()
def save_events(self) -> None:
"""Fetch upcoming events from Eventbrite and save them.
Note: this uses an API and doesn't actually web scrape.
"""
- now = timezone.localtime()
for eventbrite_organization in models.EventbriteOrganization.objects.prefetch_related("tech_group"):
tech_group = eventbrite_organization.tech_group
- events_and_tags = self.events_scraper.scrape(eventbrite_organization.eventbrite_id)
- for event, _ in events_and_tags:
- event.group = tech_group
- event.approved_at = now
- defaults = model_to_dict(event, exclude=["id"])
- defaults["group"] = tech_group
- del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved.
- models.Event.objects.update_or_create(
- external_id=event.external_id,
- defaults=defaults,
- )
+ results = self.events_scraper.scrape(eventbrite_organization.eventbrite_id)
+ for result in results:
+ self.event_service.save_event_from_result(result, tech_group)
class Sender(Protocol):
diff --git a/src/web/tests/data/eventbrite/event_description.json b/src/web/tests/data/eventbrite/event_description.json
new file mode 100644
index 00000000..1e7a1d2c
--- /dev/null
+++ b/src/web/tests/data/eventbrite/event_description.json
@@ -0,0 +1,3 @@
+{
+ "description": "
Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024"
+}
diff --git a/src/web/tests/data/eventbrite/event_image.jpg b/src/web/tests/data/eventbrite/event_image.jpg
new file mode 100644
index 00000000..caad0c37
Binary files /dev/null and b/src/web/tests/data/eventbrite/event_image.jpg differ
diff --git a/src/web/tests/data/eventbrite/event_venue.json b/src/web/tests/data/eventbrite/event_venue.json
new file mode 100644
index 00000000..cf5a8a93
--- /dev/null
+++ b/src/web/tests/data/eventbrite/event_venue.json
@@ -0,0 +1,25 @@
+{
+ "address": {
+ "address_1": "702 East Desmet Avenue",
+ "address_2": "",
+ "city": "Spokane",
+ "region": "WA",
+ "postal_code": "99202",
+ "country": "US",
+ "latitude": "47.6672448",
+ "longitude": "-117.3999126",
+ "localized_address_display": "702 East Desmet Avenue, Spokane, WA 99202",
+ "localized_area_display": "Spokane, WA",
+ "localized_multi_line_address_display": [
+ "702 East Desmet Avenue",
+ "Spokane, WA 99202"
+ ]
+ },
+ "resource_uri": "https://www.eventbriteapi.com/v3/venues/214450569/",
+ "id": "214450569",
+ "age_restriction": null,
+ "capacity": null,
+ "name": "John J. Hemmingson Center",
+ "latitude": "47.6672448",
+ "longitude": "-117.3999126"
+}
diff --git a/src/web/tests/data/eventbrite/organizer_events.json b/src/web/tests/data/eventbrite/organizer_events.json
new file mode 100644
index 00000000..400df1f6
--- /dev/null
+++ b/src/web/tests/data/eventbrite/organizer_events.json
@@ -0,0 +1,83 @@
+{
+ "pagination": {
+ "object_count": 1,
+ "page_number": 1,
+ "page_size": 50,
+ "page_count": 1,
+ "has_more_items": false
+ },
+ "events": [
+ {
+ "name": {
+ "text": "3rd Annual - INCH360 Regional Cybersecurity Conference",
+ "html": "3rd Annual - INCH360 Regional Cybersecurity Conference"
+ },
+ "description": {
+ "text": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024",
+ "html": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024"
+ },
+ "url": "https://www.eventbrite.com/e/3rd-annual-inch360-regional-cybersecurity-conference-tickets-909447069667",
+ "start": {
+ "timezone": "America/Los_Angeles",
+ "local": "2024-10-02T08:30:00",
+ "utc": "2024-10-02T15:30:00Z"
+ },
+ "end": {
+ "timezone": "America/Los_Angeles",
+ "local": "2024-10-02T16:00:00",
+ "utc": "2024-10-02T23:00:00Z"
+ },
+ "organization_id": "1773924472233",
+ "created": "2024-05-19T22:29:10Z",
+ "changed": "2024-09-06T16:49:53Z",
+ "published": "2024-05-20T20:03:26Z",
+ "capacity": null,
+ "capacity_is_custom": null,
+ "status": "live",
+ "currency": "USD",
+ "listed": true,
+ "shareable": true,
+ "online_event": false,
+ "tx_time_limit": 1200,
+ "hide_start_date": false,
+ "hide_end_date": false,
+ "locale": "en_US",
+ "is_locked": false,
+ "privacy_setting": "unlocked",
+ "is_series": false,
+ "is_series_parent": false,
+ "inventory_type": "limited",
+ "is_reserved_seating": false,
+ "show_pick_a_seat": false,
+ "show_seatmap_thumbnail": false,
+ "show_colors_in_seatmap_thumbnail": false,
+ "source": "auto_create",
+ "is_free": true,
+ "version": null,
+ "summary": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024",
+ "facebook_event_id": null,
+ "logo_id": "843746309",
+ "organizer_id": "72020528223",
+ "venue_id": "214450569",
+ "category_id": "102",
+ "subcategory_id": "2004",
+ "format_id": "2",
+ "id": "909447069667",
+ "resource_uri": "https://www.eventbriteapi.com/v3/events/909447069667/",
+ "is_externally_ticketed": false,
+ "logo": {
+ "crop_mask": null,
+ "original": {
+ "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?auto=format%2Ccompress&q=75&sharp=10&s=09370c02bd3ab62907337f2e1ca8a61d",
+ "width": 6912,
+ "height": 3456
+ },
+ "id": "843746309",
+ "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?h=200&w=450&auto=format%2Ccompress&q=75&sharp=10&s=4b5f5340dcfe0cc78bec9f19b08f45f7",
+ "aspect_ratio": "2",
+ "edge_color": "#516c79",
+ "edge_color_set": true
+ }
+ }
+ ]
+}
diff --git a/src/web/tests/data/meetup-image.jpeg b/src/web/tests/data/meetup-image.jpeg
new file mode 100644
index 00000000..61416484
Binary files /dev/null and b/src/web/tests/data/meetup-image.jpeg differ
diff --git a/src/web/tests/data/meetup-image.webp b/src/web/tests/data/meetup-image.webp
new file mode 100644
index 00000000..5b6cadfd
Binary files /dev/null and b/src/web/tests/data/meetup-image.webp differ
diff --git a/src/web/tests/test_scrapers.py b/src/web/tests/test_scrapers.py
index 54dfcf94..5839e836 100644
--- a/src/web/tests/test_scrapers.py
+++ b/src/web/tests/test_scrapers.py
@@ -1,12 +1,33 @@
import pathlib
from datetime import datetime, timedelta
+from zoneinfo import ZoneInfo
import freezegun
-import responses
import pytest
+import responses
from django.test import TestCase
-from web import models, scrapers
-from zoneinfo import ZoneInfo
+
+from web import scrapers
+
+BASE_DATA_DIR = pathlib.Path(__file__).parent / "data"
+
+
+def mock_response(
+ url: str,
+ filepath: pathlib.Path,
+) -> None:
+ with open(filepath) as fin:
+ body = fin.read()
+ responses.get(url, body=body)
+
+
+def mock_image_response(
+ url: str,
+ filepath: pathlib.Path,
+) -> None:
+ with open(filepath, "rb") as fin:
+ body = fin.read()
+ responses.get(url, body=body)
class TestMeetupHomepageScraper(TestCase):
@@ -59,16 +80,26 @@ def test_scraper_without_json(self):
class TestMeetupEventScraper(TestCase):
@responses.activate
def test_scraper_with_json(self):
- fin = open(pathlib.Path(__file__).parent / "data" / "meetup-with-json.html")
- body = fin.read()
- fin.close()
+ # Arrange
+ with open(pathlib.Path(__file__).parent / "data" / "meetup-with-json.html") as fin:
+ body = fin.read()
responses.get(
"https://www.meetup.com/python-spokane/events/298213205/",
body=body,
)
+ with open(pathlib.Path(__file__).parent / "data" / "meetup-image.jpeg", "rb") as fin:
+ body = fin.read()
+ responses.get(
+ "https://secure.meetupstatic.com/photos/event/1/0/a/e/highres_519844270.jpeg",
+ body=body,
+ )
+
+ # Act
scraper = scrapers.MeetupEventScraper()
- actual, actual_tags = scraper.scrape("https://www.meetup.com/python-spokane/events/298213205/")
+ actual, actual_tags, actual_image_result = scraper.scrape(
+ "https://www.meetup.com/python-spokane/events/298213205/"
+ )
assert actual.name == "Dagger with Spokane Tech 🚀"
assert actual.description and actual.description.startswith("Join us for our monthly SPUG meetup!")
@@ -77,6 +108,7 @@ def test_scraper_with_json(self):
assert actual.location == "1720 W 4th Ave Unit B, Spokane, WA"
assert actual.url == "https://www.meetup.com/python-spokane/events/298213205/"
assert actual.external_id == "298213205"
+
assert len(actual_tags) == 5
assert {t.value for t in actual_tags} == {
"Linux",
@@ -86,19 +118,29 @@ def test_scraper_with_json(self):
"Agile and Scrum",
}
+ assert actual_image_result
+ assert actual_image_result[0] == "highres_519844270.jpeg"
+ assert len(actual_image_result[1]) > 0
+
@responses.activate
def test_scraper_without_json(self):
- fin = open(pathlib.Path(__file__).parent / "data" / "meetup-without-json.html")
- body = fin.read()
- fin.close()
- responses.get(
+ # Arrange
+ mock_response(
"https://www.meetup.com/python-spokane/events/298213205/",
- body=body,
+ BASE_DATA_DIR / "meetup-without-json.html",
+ )
+ mock_image_response(
+ "https://secure.meetupstatic.com/photos/event/1/0/a/e/600_519844270.webp?w=750",
+ BASE_DATA_DIR / "meetup-image.webp",
)
+ # Act
scraper = scrapers.MeetupEventScraper()
- actual, actual_tags = scraper.scrape("https://www.meetup.com/python-spokane/events/298213205/")
+ actual, actual_tags, actual_image_result = scraper.scrape(
+ "https://www.meetup.com/python-spokane/events/298213205/"
+ )
+ # Assert
assert actual.name == "Dagger with Spokane Tech 🚀"
assert actual.description and actual.description.startswith("Join us for our monthly SPUG meetup!")
assert actual.date_time == datetime(2024, 3, 19, 18, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
@@ -115,6 +157,10 @@ def test_scraper_without_json(self):
"Agile and Scrum",
}
+ assert actual_image_result
+ assert actual_image_result[0] == "600_519844270.webp"
+ assert len(actual_image_result[1]) > 0
+
@pytest.mark.eventbrite
class TestEventbriteScraper(TestCase):
@@ -126,16 +172,50 @@ class TestEventbriteScraper(TestCase):
To run them, set the `EVENTBRITE_API_TOKEN` envrionment variable.
"""
+ @responses.activate
def test_scraper(self):
+ # Arrange
+ mock_response(
+ "https://www.eventbriteapi.com/v3/organizers/72020528223/events/",
+ BASE_DATA_DIR / "eventbrite" / "organizer_events.json",
+ )
+ mock_response(
+ "https://www.eventbriteapi.com/v3/venues/214450569/?expand=none",
+ BASE_DATA_DIR / "eventbrite" / "event_venue.json",
+ )
+ mock_response(
+ "https://www.eventbriteapi.com/v3/events/909447069667/description/",
+ BASE_DATA_DIR / "eventbrite" / "event_description.json",
+ )
+ mock_image_response(
+ "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?auto=format%2Ccompress&q=75&sharp=10&s=09370c02bd3ab62907337f2e1ca8a61d",
+ BASE_DATA_DIR / "eventbrite" / "event_image.jpg",
+ )
+
+ # Act
scraper = scrapers.EventbriteScraper()
- result = scraper.scrape("72020528223")
- actual: models.Event = result[0][0]
- assert actual.name == "Spring Cyber - Training Series"
- assert actual.description and actual.description.startswith(
- "
Deep Dive into Pen Testing with white hacker Casey Davis"
+ organization_id = "72020528223"
+ result = scraper.scrape(organization_id)
+
+ # Assert
+ event, tags, image_result = result[0]
+ assert event.name == "3rd Annual - INCH360 Regional Cybersecurity Conference"
+ assert event.description
+ assert event.description.startswith("
Full Day of Panels, Speakers and Vendors on Cybersecurity,")
+ assert event.date_time == datetime(2024, 10, 2, 8, 30, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
+ assert event.duration == timedelta(hours=7, minutes=30)
+ assert event.location == "702 East Desmet Avenue, Spokane, WA 99202"
+ assert (
+ event.url
+ == "https://www.eventbrite.com/e/3rd-annual-inch360-regional-cybersecurity-conference-tickets-909447069667"
)
- assert actual.date_time == datetime(2024, 5, 23, 16, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles"))
- assert actual.duration == timedelta(hours=1, minutes=30)
- assert actual.location == "2818 North Sullivan Road #Suite 100, Spokane Valley, WA 99216"
- assert actual.url == "https://www.eventbrite.com/e/spring-cyber-training-series-tickets-860181354587"
- assert actual.external_id == "860181354587"
+ assert event.external_id == "909447069667"
+
+ assert not tags
+
+ assert image_result
+ assert (
+ image_result[0]
+ == "https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727"
+ )
+ assert len(image_result[1]) > 0
diff --git a/src/web/tests/test_services.py b/src/web/tests/test_services.py
index b1a3e9de..836b6e8a 100644
--- a/src/web/tests/test_services.py
+++ b/src/web/tests/test_services.py
@@ -1,5 +1,6 @@
from django.test import TestCase
from django.utils import timezone
+
from web import models, scrapers, services
@@ -32,6 +33,10 @@ def scrape(self, url: str) -> scrapers.EventScraperResult:
models.Tag(value="Agile and Scrum"),
models.Tag(value="Python Web Development"),
],
+ (
+ "image_name",
+ b"image.png",
+ ),
)
return (
@@ -48,6 +53,10 @@ def scrape(self, url: str) -> scrapers.EventScraperResult:
models.Tag(value="Agile and Scrum"),
models.Tag(value="Python Web Development"),
],
+ (
+ "image_name",
+ b"image.png",
+ ),
)
@@ -75,6 +84,30 @@ def test_updates_event_instead_of_creating_new_one(self):
assert event.name == "Intro to Dagger"
assert event.description == "Super cool intro to Dagger CI/CD!"
assert event.external_id == MockMeetupEventScraper.EXTERNAL_ID
+ assert "image_name" in event.image.name
+
+ def test_image_is_not_reuploaded_when_contents_are_same(self):
+ # Arrange
+ models.TechGroup.objects.create(
+ name="Spokane Python User Group",
+ homepage="https://www.meetup.com/Python-Spokane/",
+ )
+
+ meetup_service = services.MeetupService(
+ MockMeetupHomepageScraper(),
+ MockMeetupEventScraper(),
+ )
+
+ # Act
+ meetup_service.save_events()
+ event1 = models.Event.objects.get()
+
+ meetup_service.save_events()
+ event2 = models.Event.objects.get()
+
+ # Assert
+ assert event1.pk == event2.pk
+ assert event1.image.name == event2.image.name
def test_manually_applied_tags_are_not_overriden(self):
# Arrange