diff --git a/src/web/scrapers.py b/src/web/scrapers.py index 8476185d..c16ebb37 100644 --- a/src/web/scrapers.py +++ b/src/web/scrapers.py @@ -3,13 +3,14 @@ import pathlib import re import urllib.parse +import zoneinfo from datetime import datetime, timedelta from typing import Any, Protocol, TypeAlias, TypeVar import eventbrite.access_methods import requests -import zoneinfo -from bs4 import BeautifulSoup, Tag +from bs4 import BeautifulSoup +from bs4.element import Tag from django.conf import settings from django.utils import timezone from eventbrite import Eventbrite @@ -26,7 +27,7 @@ def get_venue(self, id, **data): def get_event_description(self, id, **data): - return self.get("/events/{0}/description//".format(id), data=data) + return self.get("/events/{0}/description/".format(id), data=data) setattr(eventbrite.access_methods.AccessMethodsMixin, "get_venue", get_venue) @@ -39,10 +40,23 @@ def scrape(self, url: str) -> ST: ... -EventScraperResult: TypeAlias = tuple[models.Event, list[models.Tag]] +ImageResult: TypeAlias = tuple[str, bytes] +EventScraperResult: TypeAlias = tuple[models.Event, list[models.Tag], ImageResult | None] + + +class ScraperMixin: + def _get_image(self, image_url: str) -> ImageResult: + image_name = self._parse_image_name(image_url) + response = requests.get(image_url, timeout=10) + response.raise_for_status() + image = response.content + return image_name, image + def _parse_image_name(self, image_url: str) -> str: + return image_url.rsplit("/", maxsplit=1)[-1].split("?", maxsplit=1)[0] -class MeetupScraperMixin: + +class MeetupScraperMixin(ScraperMixin): """Common Meetup scraping functionality.""" def _parse_apollo_state(self, soup: BeautifulSoup) -> dict: @@ -84,8 +98,8 @@ def scrape(self, url: str) -> list[str]: else: upcoming_section = soup.find_all(id="upcoming-section")[0] events = upcoming_section.find_all_next(id=re.compile(r"event-card-")) - filtered_event_containers = [event for event in events if self._filter_event_tag(event)] - event_urls = [event_container["href"] for event_container in filtered_event_containers] + filtered_event_containers: list[Tag] = [event for event in events if self._filter_event_tag(event)] # type: ignore + event_urls: list[str] = [event_container["href"] for event_container in filtered_event_containers] # type: ignore return [url for url in event_urls if self._filter_repeating_events(url)] @@ -136,6 +150,8 @@ def scrape(self, url: str) -> EventScraperResult: location_data = apollo_state[event_json["venue"]["__ref"]] location = f"{location_data['address']}, {location_data['city']}, {location_data['state']}" external_id = event_json["id"] + event_photo = event_json["featuredEventPhoto"]["__ref"] + image_url = apollo_state[event_photo].get("highResUrl", apollo_state[event_photo]["baseUrl"]) except KeyError: name = self._parse_name(soup) description = self._parse_description(soup) @@ -143,20 +159,22 @@ def scrape(self, url: str) -> EventScraperResult: duration = self._parse_duration(soup) location = self._parse_location(soup) external_id = self._parse_external_id(url) + image_url = self._parse_image(soup) + + if image_url: + image_result = self._get_image(image_url) tags = self._parse_tags(soup) - return ( - models.Event( - name=name, - description=description, - date_time=date_time, - duration=duration, - location=location, - external_id=external_id, - url=url, - ), - tags, + event = models.Event( + name=name, + description=description, + date_time=date_time, + duration=duration, + location=location, + external_id=external_id, + url=url, ) + return (event, tags, image_result) def _parse_name(self, soup: BeautifulSoup) -> str: name: str = soup.find_all("h1")[0].text @@ -171,10 +189,16 @@ def _parse_description(self, soup: BeautifulSoup) -> str: return description def _parse_date_time(self, soup: BeautifulSoup) -> datetime: - return datetime.fromisoformat(soup.find_all("time")[0]["datetime"]) + time: Tag | None = soup.find("time") # type: ignore + if not time: + raise ValueError("could not find time") + dt: str = time["datetime"] # type: ignore + return datetime.fromisoformat(dt) def _parse_duration(self, soup: BeautifulSoup) -> timedelta: - time: Tag = soup.find_all("time")[0] + time: Tag | None = soup.find("time") # type: ignore + if not time: + raise ValueError("could not find time") matches = self.DURATION_PATTERN.findall(time.text) if not matches: raise ValueError("Could not find duration from:", time.text) @@ -199,8 +223,18 @@ def _parse_tags(self, soup: BeautifulSoup) -> list[models.Tag]: tags = [re.sub(r"\s+", " ", t.text) for t in tags] # Some tags have newlines & extra spaces return [models.Tag(value=t) for t in tags] + def _parse_image(self, soup: BeautifulSoup) -> str | None: + picture = soup.find(attrs={"data-testid": "event-description-image"}) + if not picture: + return None + img: Tag | None = picture.find("img") # type: ignore + if not img: + return None + src: str = img["src"] # type: ignore + return src + -class EventbriteScraper(Scraper[list[EventScraperResult]]): +class EventbriteScraper(ScraperMixin, Scraper[list[EventScraperResult]]): def __init__(self, api_token: str | None = None): self.client = Eventbrite(api_token or settings.EVENTBRITE_API_TOKEN) self._location_by_venue_id: dict[str, str] = {} @@ -209,11 +243,12 @@ def scrape(self, organization_id: str) -> list[EventScraperResult]: response = self.client.get_organizer_events( organization_id, status="live", + expand="logo", ) - events_and_tags = [self.map_to_event(eventbrite_event) for eventbrite_event in response["events"]] - return events_and_tags + results = [self.map_to_event(eventbrite_event) for eventbrite_event in response["events"]] + return results - def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[models.Tag]]: + def map_to_event(self, eventbrite_event: dict) -> EventScraperResult: name = eventbrite_event["name"]["text"] start = datetime.fromisoformat(eventbrite_event["start"]["utc"]) end = datetime.fromisoformat(eventbrite_event["end"]["utc"]) @@ -230,6 +265,16 @@ def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[model # short description description = eventbrite_event["description"]["html"] + try: + image_url = eventbrite_event["logo"]["original"]["url"] + image_result = self._get_image(image_url) + except (KeyError, requests.HTTPError): + try: + image_url = eventbrite_event["logo"]["url"] + image_result = self._get_image(image_url) + except KeyError: + image_result = None + event = models.Event( name=name, description=description, @@ -249,7 +294,7 @@ def map_to_event(self, eventbrite_event: dict) -> tuple[models.Event, list[model # if subcategory_name: # tags.append(models.Tag(value=subcategory_name)) - return event, [] + return event, [], image_result @functools.lru_cache def _get_venue_location(self, venue_id: str) -> str: diff --git a/src/web/services.py b/src/web/services.py index f0e295e1..9823635b 100644 --- a/src/web/services.py +++ b/src/web/services.py @@ -1,43 +1,91 @@ from datetime import timedelta from typing import Protocol +from django.core.files.base import ContentFile from django.forms.models import model_to_dict from django.utils import timezone from web import models, scrapers +class EventService: + def save_event_from_result( + self, + result: scrapers.EventScraperResult, + tech_group: models.TechGroup, + ) -> None: + event, tags, image_result = result + event = self._save_event(event, tech_group) + self._save_tags(event, tags) + if image_result is not None: + self._save_image(event, image_result) + + def _save_event( + self, + event: models.Event, + tech_group: models.TechGroup, + ) -> models.Event: + event.group = tech_group + event.approved_at = timezone.localtime() + defaults = model_to_dict(event, exclude=["id"]) + defaults["group"] = tech_group + + del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved. + del defaults["image"] + + updated_event, _ = models.Event.objects.update_or_create( + external_id=event.external_id, + defaults=defaults, + ) + return updated_event + + def _save_tags( + self, + event: models.Event, + tags: list[models.Tag], + ) -> None: + for tag in tags: + tag, _ = models.Tag.objects.get_or_create(value=tag) + event.tags.add(tag) + + def _save_image( + self, + event: models.Event, + image_result: scrapers.ImageResult, + ) -> None: + image_name, image = image_result + + # If images are the same, don't re-upload + has_existing_image = bool(event.image) + if has_existing_image: + existing_image = event.image.read() + if existing_image == image: + return + + file = ContentFile(image, name=image_name) + event.image.save(image_name, file) + + class MeetupService: def __init__( self, homepage_scraper: scrapers.Scraper[list[str]] | None = None, event_scraper: scrapers.Scraper[scrapers.EventScraperResult] | None = None, + event_service: EventService | None = None, ) -> None: self.homepage_scraper: scrapers.Scraper[list[str]] = homepage_scraper or scrapers.MeetupHomepageScraper() self.event_scraper: scrapers.Scraper[scrapers.EventScraperResult] = ( event_scraper or scrapers.MeetupEventScraper() ) + self.event_service = event_service or EventService() def save_events(self) -> None: """Scrape upcoming events from Meetup and save them to the database.""" - now = timezone.localtime() for tech_group in models.TechGroup.objects.filter(homepage__icontains="meetup.com"): event_urls = self.homepage_scraper.scrape(tech_group.homepage) # type: ignore for event_url in event_urls: # TODO: parallelize (with async?) - event, tags = self.event_scraper.scrape(event_url) - event.group = tech_group - event.approved_at = now - defaults = model_to_dict(event, exclude=["id"]) - defaults["group"] = tech_group - - del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved. - new_event, _ = models.Event.objects.update_or_create( - external_id=event.external_id, - defaults=defaults, - ) - for tag in tags: - tag, _ = models.Tag.objects.get_or_create(value=tag) - new_event.tags.add(tag) + result = self.event_scraper.scrape(event_url) + self.event_service.save_event_from_result(result, tech_group) class EventbriteService: @@ -46,28 +94,21 @@ class EventbriteService: def __init__( self, events_scraper: scrapers.Scraper[list[scrapers.EventScraperResult]] | None = None, + event_service: EventService | None = None, ) -> None: self.events_scraper = events_scraper or scrapers.EventbriteScraper() + self.event_service = event_service or EventService() def save_events(self) -> None: """Fetch upcoming events from Eventbrite and save them. Note: this uses an API and doesn't actually web scrape. """ - now = timezone.localtime() for eventbrite_organization in models.EventbriteOrganization.objects.prefetch_related("tech_group"): tech_group = eventbrite_organization.tech_group - events_and_tags = self.events_scraper.scrape(eventbrite_organization.eventbrite_id) - for event, _ in events_and_tags: - event.group = tech_group - event.approved_at = now - defaults = model_to_dict(event, exclude=["id"]) - defaults["group"] = tech_group - del defaults["tags"] # Can't apply Many-to-Many relationship untill after the event has been saved. - models.Event.objects.update_or_create( - external_id=event.external_id, - defaults=defaults, - ) + results = self.events_scraper.scrape(eventbrite_organization.eventbrite_id) + for result in results: + self.event_service.save_event_from_result(result, tech_group) class Sender(Protocol): diff --git a/src/web/tests/data/eventbrite/event_description.json b/src/web/tests/data/eventbrite/event_description.json new file mode 100644 index 00000000..1e7a1d2c --- /dev/null +++ b/src/web/tests/data/eventbrite/event_description.json @@ -0,0 +1,3 @@ +{ + "description": "
Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024" +} diff --git a/src/web/tests/data/eventbrite/event_image.jpg b/src/web/tests/data/eventbrite/event_image.jpg new file mode 100644 index 00000000..caad0c37 Binary files /dev/null and b/src/web/tests/data/eventbrite/event_image.jpg differ diff --git a/src/web/tests/data/eventbrite/event_venue.json b/src/web/tests/data/eventbrite/event_venue.json new file mode 100644 index 00000000..cf5a8a93 --- /dev/null +++ b/src/web/tests/data/eventbrite/event_venue.json @@ -0,0 +1,25 @@ +{ + "address": { + "address_1": "702 East Desmet Avenue", + "address_2": "", + "city": "Spokane", + "region": "WA", + "postal_code": "99202", + "country": "US", + "latitude": "47.6672448", + "longitude": "-117.3999126", + "localized_address_display": "702 East Desmet Avenue, Spokane, WA 99202", + "localized_area_display": "Spokane, WA", + "localized_multi_line_address_display": [ + "702 East Desmet Avenue", + "Spokane, WA 99202" + ] + }, + "resource_uri": "https://www.eventbriteapi.com/v3/venues/214450569/", + "id": "214450569", + "age_restriction": null, + "capacity": null, + "name": "John J. Hemmingson Center", + "latitude": "47.6672448", + "longitude": "-117.3999126" +} diff --git a/src/web/tests/data/eventbrite/organizer_events.json b/src/web/tests/data/eventbrite/organizer_events.json new file mode 100644 index 00000000..400df1f6 --- /dev/null +++ b/src/web/tests/data/eventbrite/organizer_events.json @@ -0,0 +1,83 @@ +{ + "pagination": { + "object_count": 1, + "page_number": 1, + "page_size": 50, + "page_count": 1, + "has_more_items": false + }, + "events": [ + { + "name": { + "text": "3rd Annual - INCH360 Regional Cybersecurity Conference", + "html": "3rd Annual - INCH360 Regional Cybersecurity Conference" + }, + "description": { + "text": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024", + "html": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024" + }, + "url": "https://www.eventbrite.com/e/3rd-annual-inch360-regional-cybersecurity-conference-tickets-909447069667", + "start": { + "timezone": "America/Los_Angeles", + "local": "2024-10-02T08:30:00", + "utc": "2024-10-02T15:30:00Z" + }, + "end": { + "timezone": "America/Los_Angeles", + "local": "2024-10-02T16:00:00", + "utc": "2024-10-02T23:00:00Z" + }, + "organization_id": "1773924472233", + "created": "2024-05-19T22:29:10Z", + "changed": "2024-09-06T16:49:53Z", + "published": "2024-05-20T20:03:26Z", + "capacity": null, + "capacity_is_custom": null, + "status": "live", + "currency": "USD", + "listed": true, + "shareable": true, + "online_event": false, + "tx_time_limit": 1200, + "hide_start_date": false, + "hide_end_date": false, + "locale": "en_US", + "is_locked": false, + "privacy_setting": "unlocked", + "is_series": false, + "is_series_parent": false, + "inventory_type": "limited", + "is_reserved_seating": false, + "show_pick_a_seat": false, + "show_seatmap_thumbnail": false, + "show_colors_in_seatmap_thumbnail": false, + "source": "auto_create", + "is_free": true, + "version": null, + "summary": "Full Day of Panels, Speakers and Vendors on Cybersecurity, AI and Compliance. FREE with pre-registration - space limited - Oct 2, 2024", + "facebook_event_id": null, + "logo_id": "843746309", + "organizer_id": "72020528223", + "venue_id": "214450569", + "category_id": "102", + "subcategory_id": "2004", + "format_id": "2", + "id": "909447069667", + "resource_uri": "https://www.eventbriteapi.com/v3/events/909447069667/", + "is_externally_ticketed": false, + "logo": { + "crop_mask": null, + "original": { + "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?auto=format%2Ccompress&q=75&sharp=10&s=09370c02bd3ab62907337f2e1ca8a61d", + "width": 6912, + "height": 3456 + }, + "id": "843746309", + "url": "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?h=200&w=450&auto=format%2Ccompress&q=75&sharp=10&s=4b5f5340dcfe0cc78bec9f19b08f45f7", + "aspect_ratio": "2", + "edge_color": "#516c79", + "edge_color_set": true + } + } + ] +} diff --git a/src/web/tests/data/meetup-image.jpeg b/src/web/tests/data/meetup-image.jpeg new file mode 100644 index 00000000..61416484 Binary files /dev/null and b/src/web/tests/data/meetup-image.jpeg differ diff --git a/src/web/tests/data/meetup-image.webp b/src/web/tests/data/meetup-image.webp new file mode 100644 index 00000000..5b6cadfd Binary files /dev/null and b/src/web/tests/data/meetup-image.webp differ diff --git a/src/web/tests/test_scrapers.py b/src/web/tests/test_scrapers.py index 54dfcf94..5839e836 100644 --- a/src/web/tests/test_scrapers.py +++ b/src/web/tests/test_scrapers.py @@ -1,12 +1,33 @@ import pathlib from datetime import datetime, timedelta +from zoneinfo import ZoneInfo import freezegun -import responses import pytest +import responses from django.test import TestCase -from web import models, scrapers -from zoneinfo import ZoneInfo + +from web import scrapers + +BASE_DATA_DIR = pathlib.Path(__file__).parent / "data" + + +def mock_response( + url: str, + filepath: pathlib.Path, +) -> None: + with open(filepath) as fin: + body = fin.read() + responses.get(url, body=body) + + +def mock_image_response( + url: str, + filepath: pathlib.Path, +) -> None: + with open(filepath, "rb") as fin: + body = fin.read() + responses.get(url, body=body) class TestMeetupHomepageScraper(TestCase): @@ -59,16 +80,26 @@ def test_scraper_without_json(self): class TestMeetupEventScraper(TestCase): @responses.activate def test_scraper_with_json(self): - fin = open(pathlib.Path(__file__).parent / "data" / "meetup-with-json.html") - body = fin.read() - fin.close() + # Arrange + with open(pathlib.Path(__file__).parent / "data" / "meetup-with-json.html") as fin: + body = fin.read() responses.get( "https://www.meetup.com/python-spokane/events/298213205/", body=body, ) + with open(pathlib.Path(__file__).parent / "data" / "meetup-image.jpeg", "rb") as fin: + body = fin.read() + responses.get( + "https://secure.meetupstatic.com/photos/event/1/0/a/e/highres_519844270.jpeg", + body=body, + ) + + # Act scraper = scrapers.MeetupEventScraper() - actual, actual_tags = scraper.scrape("https://www.meetup.com/python-spokane/events/298213205/") + actual, actual_tags, actual_image_result = scraper.scrape( + "https://www.meetup.com/python-spokane/events/298213205/" + ) assert actual.name == "Dagger with Spokane Tech 🚀" assert actual.description and actual.description.startswith("Join us for our monthly SPUG meetup!") @@ -77,6 +108,7 @@ def test_scraper_with_json(self): assert actual.location == "1720 W 4th Ave Unit B, Spokane, WA" assert actual.url == "https://www.meetup.com/python-spokane/events/298213205/" assert actual.external_id == "298213205" + assert len(actual_tags) == 5 assert {t.value for t in actual_tags} == { "Linux", @@ -86,19 +118,29 @@ def test_scraper_with_json(self): "Agile and Scrum", } + assert actual_image_result + assert actual_image_result[0] == "highres_519844270.jpeg" + assert len(actual_image_result[1]) > 0 + @responses.activate def test_scraper_without_json(self): - fin = open(pathlib.Path(__file__).parent / "data" / "meetup-without-json.html") - body = fin.read() - fin.close() - responses.get( + # Arrange + mock_response( "https://www.meetup.com/python-spokane/events/298213205/", - body=body, + BASE_DATA_DIR / "meetup-without-json.html", + ) + mock_image_response( + "https://secure.meetupstatic.com/photos/event/1/0/a/e/600_519844270.webp?w=750", + BASE_DATA_DIR / "meetup-image.webp", ) + # Act scraper = scrapers.MeetupEventScraper() - actual, actual_tags = scraper.scrape("https://www.meetup.com/python-spokane/events/298213205/") + actual, actual_tags, actual_image_result = scraper.scrape( + "https://www.meetup.com/python-spokane/events/298213205/" + ) + # Assert assert actual.name == "Dagger with Spokane Tech 🚀" assert actual.description and actual.description.startswith("Join us for our monthly SPUG meetup!") assert actual.date_time == datetime(2024, 3, 19, 18, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles")) @@ -115,6 +157,10 @@ def test_scraper_without_json(self): "Agile and Scrum", } + assert actual_image_result + assert actual_image_result[0] == "600_519844270.webp" + assert len(actual_image_result[1]) > 0 + @pytest.mark.eventbrite class TestEventbriteScraper(TestCase): @@ -126,16 +172,50 @@ class TestEventbriteScraper(TestCase): To run them, set the `EVENTBRITE_API_TOKEN` envrionment variable. """ + @responses.activate def test_scraper(self): + # Arrange + mock_response( + "https://www.eventbriteapi.com/v3/organizers/72020528223/events/", + BASE_DATA_DIR / "eventbrite" / "organizer_events.json", + ) + mock_response( + "https://www.eventbriteapi.com/v3/venues/214450569/?expand=none", + BASE_DATA_DIR / "eventbrite" / "event_venue.json", + ) + mock_response( + "https://www.eventbriteapi.com/v3/events/909447069667/description/", + BASE_DATA_DIR / "eventbrite" / "event_description.json", + ) + mock_image_response( + "https://img.evbuc.com/https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727?auto=format%2Ccompress&q=75&sharp=10&s=09370c02bd3ab62907337f2e1ca8a61d", + BASE_DATA_DIR / "eventbrite" / "event_image.jpg", + ) + + # Act scraper = scrapers.EventbriteScraper() - result = scraper.scrape("72020528223") - actual: models.Event = result[0][0] - assert actual.name == "Spring Cyber - Training Series" - assert actual.description and actual.description.startswith( - "
Deep Dive into Pen Testing with white hacker Casey Davis" + organization_id = "72020528223" + result = scraper.scrape(organization_id) + + # Assert + event, tags, image_result = result[0] + assert event.name == "3rd Annual - INCH360 Regional Cybersecurity Conference" + assert event.description + assert event.description.startswith("
Full Day of Panels, Speakers and Vendors on Cybersecurity,") + assert event.date_time == datetime(2024, 10, 2, 8, 30, 0, tzinfo=ZoneInfo("America/Los_Angeles")) + assert event.duration == timedelta(hours=7, minutes=30) + assert event.location == "702 East Desmet Avenue, Spokane, WA 99202" + assert ( + event.url + == "https://www.eventbrite.com/e/3rd-annual-inch360-regional-cybersecurity-conference-tickets-909447069667" ) - assert actual.date_time == datetime(2024, 5, 23, 16, 0, 0, tzinfo=ZoneInfo("America/Los_Angeles")) - assert actual.duration == timedelta(hours=1, minutes=30) - assert actual.location == "2818 North Sullivan Road #Suite 100, Spokane Valley, WA 99216" - assert actual.url == "https://www.eventbrite.com/e/spring-cyber-training-series-tickets-860181354587" - assert actual.external_id == "860181354587" + assert event.external_id == "909447069667" + + assert not tags + + assert image_result + assert ( + image_result[0] + == "https%3A%2F%2Fcdn.evbuc.com%2Fimages%2F843746309%2F530357704049%2F1%2Foriginal.20240906-164727" + ) + assert len(image_result[1]) > 0 diff --git a/src/web/tests/test_services.py b/src/web/tests/test_services.py index b1a3e9de..836b6e8a 100644 --- a/src/web/tests/test_services.py +++ b/src/web/tests/test_services.py @@ -1,5 +1,6 @@ from django.test import TestCase from django.utils import timezone + from web import models, scrapers, services @@ -32,6 +33,10 @@ def scrape(self, url: str) -> scrapers.EventScraperResult: models.Tag(value="Agile and Scrum"), models.Tag(value="Python Web Development"), ], + ( + "image_name", + b"image.png", + ), ) return ( @@ -48,6 +53,10 @@ def scrape(self, url: str) -> scrapers.EventScraperResult: models.Tag(value="Agile and Scrum"), models.Tag(value="Python Web Development"), ], + ( + "image_name", + b"image.png", + ), ) @@ -75,6 +84,30 @@ def test_updates_event_instead_of_creating_new_one(self): assert event.name == "Intro to Dagger" assert event.description == "Super cool intro to Dagger CI/CD!" assert event.external_id == MockMeetupEventScraper.EXTERNAL_ID + assert "image_name" in event.image.name + + def test_image_is_not_reuploaded_when_contents_are_same(self): + # Arrange + models.TechGroup.objects.create( + name="Spokane Python User Group", + homepage="https://www.meetup.com/Python-Spokane/", + ) + + meetup_service = services.MeetupService( + MockMeetupHomepageScraper(), + MockMeetupEventScraper(), + ) + + # Act + meetup_service.save_events() + event1 = models.Event.objects.get() + + meetup_service.save_events() + event2 = models.Event.objects.get() + + # Assert + assert event1.pk == event2.pk + assert event1.image.name == event2.image.name def test_manually_applied_tags_are_not_overriden(self): # Arrange