From 1dfb654c644e915b445882b57db8be986699f81b Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:03:42 -0400 Subject: [PATCH 1/8] Chrome driver --- backend/Pipfile | 2 +- backend/Pipfile.lock | 92 +++++++------------ .../commands/get_penn_today_events.py | 6 +- 3 files changed, 37 insertions(+), 63 deletions(-) diff --git a/backend/Pipfile b/backend/Pipfile index 37d46dd3..b3e28a22 100644 --- a/backend/Pipfile +++ b/backend/Pipfile @@ -45,7 +45,7 @@ django-redis = "*" redis = "*" python-dateutil = "*" selenium = "*" -webdriver-manager = "*" +chromedriver-binary = "*" [requires] python_version = "3.11" diff --git a/backend/Pipfile.lock b/backend/Pipfile.lock index 255d8290..222caa4f 100644 --- a/backend/Pipfile.lock +++ b/backend/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "00fcc1f1633885e1bfb66d00ecfde1bcd4398813869828461c35ed3b4706487b" + "sha256": "6297f161851c136f37143ec29f65d41dd4797c210933fad74b4169327b75c9d8" }, "pipfile-spec": 6, "requires": { @@ -40,14 +40,6 @@ "markers": "python_version >= '3.7'", "version": "==3.7.2" }, - "async-timeout": { - "hashes": [ - "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f", - "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028" - ], - "markers": "python_version >= '3.7'", - "version": "==4.0.3" - }, "attrs": { "hashes": [ "sha256:935dc3b529c262f6cf76e50877d35a4bd3c1de194fd41f47a2b7ae8f19971f30", @@ -74,20 +66,20 @@ }, "boto3": { "hashes": [ - "sha256:c26c31ceeeb2bc5d2bb96ba0fdc9a04d7b10e6e0b081c55b9cea9069a0be04dd", - "sha256:f8046e3e2d1186a49b49f7464c4811c265c86001f404dd1a96c4365c773a4245" + "sha256:004e67b078be58d34469406f93cc8b95bc43becef4bbe44523a0b8e51f84c668", + "sha256:162edf182e53c198137a28432a626dba103f787a8f5000ed4758b73ccd203fa0" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==1.34.57" + "version": "==1.34.59" }, "botocore": { "hashes": [ - "sha256:9a5aa2034de9f0c367b4b61a92af0fa827f5c21affa19e0a284838a142e71083", - "sha256:c8dafe0ad378a88bcf4153e6972870b03fb5aab406b694202307500709940baf" + "sha256:24edb4d21d7c97dea0c6c4a80d36b3809b1443a30b0bd5e317d6c319dfac823f", + "sha256:4bc112dafb1679ab571117593f7656604726a3da0e5ae5bad00ea772fa40e75c" ], "markers": "python_version >= '3.8'", - "version": "==1.34.57" + "version": "==1.34.59" }, "celery": { "hashes": [ @@ -260,6 +252,13 @@ "markers": "python_full_version >= '3.7.0'", "version": "==3.3.2" }, + "chromedriver-binary": { + "hashes": [ + "sha256:b315ab68024f7ba3b4ac83e57786c6168992abcd9b59e2cad8ccf4e95773ac67" + ], + "index": "pypi", + "version": "==124.0.6348.0.0" + }, "click": { "hashes": [ "sha256:6a7a62563bbfabfda3a38f3023a1db4a35978c0abd76f6c9605ecd6554d6d9b1", @@ -367,12 +366,12 @@ }, "django-filter": { "hashes": [ - "sha256:67583aa43b91fe8c49f74a832d95f4d8442be628fd4c6d65e9f811f5153a4e5c", - "sha256:99122a201d83860aef4fe77758b69dda913e874cc5e0eaa50a86b0b18d708400" + "sha256:335bcae6cbd3e984b024841070f567b22faea57594f27d37c52f8f131f8d8621", + "sha256:65cb43ce272077e5ac6aae1054d76c121cd6b552e296a82a13921e9371baf8c1" ], "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==23.5" + "markers": "python_version >= '3.8'", + "version": "==24.1" }, "django-labs-accounts": { "hashes": [ @@ -566,14 +565,6 @@ "markers": "python_version >= '3.7'", "version": "==1.3.0.post0" }, - "packaging": { - "hashes": [ - "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", - "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" - ], - "markers": "python_version >= '3.7'", - "version": "==23.2" - }, "pandas": { "hashes": [ "sha256:04f6ec3baec203c13e3f8b139fb0f9f86cd8c0b94603ae3ae8ce9a422e9f5bee", @@ -748,17 +739,9 @@ "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427" ], "index": "pypi", - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==2.9.0.post0" }, - "python-dotenv": { - "hashes": [ - "sha256:e324ee90a023d808f1959c46bcbc04446a10ced277783dc6ee09987c37ec10ca", - "sha256:f7b63ef50f1b690dddf550d03497b66d609393b40b564ed0d674909a68ebf16a" - ], - "markers": "python_version >= '3.8'", - "version": "==1.0.1" - }, "pytz": { "hashes": [ "sha256:2a29735ea9c18baf14b448846bde5a48030ed267578472d8955cd0e7443a9812", @@ -826,12 +809,12 @@ }, "redis": { "hashes": [ - "sha256:3f82cc80d350e93042c8e6e7a5d0596e4dd68715babffba79492733e1f367037", - "sha256:4caa8e1fcb6f3c0ef28dba99535101d80934b7d4cd541bbb47f4a3826ee472d1" + "sha256:4973bae7444c0fbed64a06b87446f79361cb7e4ec1538c022d696ed7a5015580", + "sha256:5da9b8fe9e1254293756c16c008e8620b3d15fcc6dde6babde9541850e72a32d" ], "index": "pypi", "markers": "python_version >= '3.7'", - "version": "==5.0.2" + "version": "==5.0.3" }, "requests": { "hashes": [ @@ -868,18 +851,18 @@ }, "sentry-sdk": { "hashes": [ - "sha256:becda09660df63e55f307570e9817c664392655a7328bbc414b507e9cb874c67", - "sha256:f143f3fb4bb57c90abef6e2ad06b5f6f02b2ca13e4060ec5c0549c7a9ccce3fa" + "sha256:4f2d6c43c07925d8cd10dfbd0970ea7cb784f70e79523cca9dbcd72df38e5a46", + "sha256:be4f8f4b29a80b6a3b71f0f31487beb9e296391da20af8504498a328befed53f" ], "index": "pypi", - "version": "==1.40.6" + "version": "==1.41.0" }, "six": { "hashes": [ "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926", "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254" ], - "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==1.16.0" }, "sniffio": { @@ -984,15 +967,6 @@ ], "version": "==0.2.13" }, - "webdriver-manager": { - "hashes": [ - "sha256:25ec177c6a2ce9c02fb8046f1b2732701a9418d6a977967bb065d840a3175d87", - "sha256:d7970052295bb9cda2c1a24cf0b872dd2c41ababcc78f7b6b8dc37a41e979a7e" - ], - "index": "pypi", - "markers": "python_version >= '3.7'", - "version": "==4.0.1" - }, "webencodings": { "hashes": [ "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", @@ -1271,11 +1245,11 @@ }, "packaging": { "hashes": [ - "sha256:048fb0e9405036518eaaf48a55953c750c11e1a1b68e0dd1a9d62ed0c092cfc5", - "sha256:8c491190033a9af7e1d931d0b5dacc2ef47509b34dd0de67ed209b5203fc88c7" + "sha256:2ddfb553fdf02fb784c234c7ba6ccc288296ceabec964ad2eae3777778130bc5", + "sha256:eb82c5e3e56209074766e6885bb04b8c38a0c015d0a30036ebe7ece34c9989e9" ], "markers": "python_version >= '3.7'", - "version": "==23.2" + "version": "==24.0" }, "pathspec": { "hashes": [ @@ -1318,12 +1292,12 @@ }, "pytest": { "hashes": [ - "sha256:d4051d623a2e0b7e51960ba963193b09ce6daeb9759a451844a21e4ddedfc1bd", - "sha256:edfaaef32ce5172d5466b5127b42e0d6d35ebbe4453f0e3505d96afd93f6b096" + "sha256:2a8386cfc11fa9d2c50ee7b2a57e7d898ef90470a7a34c4b949ff59662bb78b7", + "sha256:ac978141a75948948817d360297b7aae0fcb9d6ff6bc9ec6d514b85d5a65c044" ], "index": "pypi", "markers": "python_version >= '3.8'", - "version": "==8.0.2" + "version": "==8.1.1" }, "pytoolconfig": { "extras": [ @@ -1466,7 +1440,7 @@ "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" ], - "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2'", + "markers": "python_version >= '2.6' and python_version not in '3.0, 3.1, 3.2, 3.3'", "version": "==0.10.2" }, "typed-ast": { diff --git a/backend/penndata/management/commands/get_penn_today_events.py b/backend/penndata/management/commands/get_penn_today_events.py index b144b452..47c485b7 100644 --- a/backend/penndata/management/commands/get_penn_today_events.py +++ b/backend/penndata/management/commands/get_penn_today_events.py @@ -11,6 +11,8 @@ from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait from webdriver_manager.firefox import GeckoDriverManager +import chromedriver_binary # Adds chromedriver binary to path + from penndata.models import Event @@ -121,9 +123,7 @@ def connect_and_parse_html(self, event_url, condition): try: options = Options() options.add_argument("--headless") - driver = webdriver.Firefox( - service=FirefoxService(GeckoDriverManager().install()), options=options - ) + driver = webdriver.Chrome(options=options) driver.get(event_url) print("WAITING FOR ELEMENT") From 2080a663b6244a45ed4e30f611c6773835f28639 Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:21:34 -0400 Subject: [PATCH 2/8] Test Chrome driver --- .../commands/get_penn_today_events.py | 17 +++++++++-------- backend/tests/penndata/test_events.py | 13 +++++++++++++ 2 files changed, 22 insertions(+), 8 deletions(-) create mode 100644 backend/tests/penndata/test_events.py diff --git a/backend/penndata/management/commands/get_penn_today_events.py b/backend/penndata/management/commands/get_penn_today_events.py index 47c485b7..4f33d264 100644 --- a/backend/penndata/management/commands/get_penn_today_events.py +++ b/backend/penndata/management/commands/get_penn_today_events.py @@ -1,18 +1,15 @@ import datetime from urllib.parse import urljoin +import chromedriver_binary from bs4 import BeautifulSoup from django.core.management.base import BaseCommand from django.utils import timezone from selenium import webdriver from selenium.webdriver.common.by import By -from selenium.webdriver.firefox.options import Options -from selenium.webdriver.firefox.service import Service as FirefoxService +# from selenium.webdriver.firefox.service import Service as FirefoxService from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait -from webdriver_manager.firefox import GeckoDriverManager -import chromedriver_binary # Adds chromedriver binary to path - from penndata.models import Event @@ -75,7 +72,7 @@ def handle(self, *args, **kwargs): start_time = datetime.time(0, 0) else: start_time = datetime.datetime.strptime(start_time_str, "%I:%M%p").time() - start_date = datetime.datetime.combine(start_date, start_time) + start_date = timezone.make_aware(datetime.datetime.combine(start_date, start_time)) if start_date > now + datetime.timedelta(days=31): continue @@ -108,7 +105,7 @@ def handle(self, *args, **kwargs): defaults={ "event_type": "Penn Today", "image_url": "", - "start": timezone.make_aware(start_date), + "start": start_date, "end": timezone.make_aware(end_date), "location": location, "website": event_url, @@ -121,7 +118,11 @@ def handle(self, *args, **kwargs): def connect_and_parse_html(self, event_url, condition): try: - options = Options() + # from selenium.webdriver.chrome.service import Service + print(chromedriver_binary.chromedriver_filename) + + # service = Service(executable_path=chromedriver_binary.chromedriver_filename) + options = webdriver.ChromeOptions() options.add_argument("--headless") driver = webdriver.Chrome(options=options) diff --git a/backend/tests/penndata/test_events.py b/backend/tests/penndata/test_events.py new file mode 100644 index 00000000..3b4c7903 --- /dev/null +++ b/backend/tests/penndata/test_events.py @@ -0,0 +1,13 @@ +from django.core.management import call_command +from django.test import TestCase + +from penndata.models import Event + + +class TestPennTodayEvents(TestCase): + def setUp(self): + call_command("get_penn_today_events") + + def test_event(self): + events = Event.objects.all() + self.assertLess(0, events.count()) From 1235e933c2a48c53224e5875d1b33cd302cf9474 Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:23:35 -0400 Subject: [PATCH 3/8] Black --- backend/penndata/management/commands/get_penn_today_events.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/penndata/management/commands/get_penn_today_events.py b/backend/penndata/management/commands/get_penn_today_events.py index 4f33d264..f2b7c99f 100644 --- a/backend/penndata/management/commands/get_penn_today_events.py +++ b/backend/penndata/management/commands/get_penn_today_events.py @@ -7,7 +7,6 @@ from django.utils import timezone from selenium import webdriver from selenium.webdriver.common.by import By -# from selenium.webdriver.firefox.service import Service as FirefoxService from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.ui import WebDriverWait From 74fd7afecdd74baa92d9c0fc2e192b4a75afb67a Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:27:29 -0400 Subject: [PATCH 4/8] Add service? --- .../penndata/management/commands/get_penn_today_events.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/backend/penndata/management/commands/get_penn_today_events.py b/backend/penndata/management/commands/get_penn_today_events.py index f2b7c99f..84d231e0 100644 --- a/backend/penndata/management/commands/get_penn_today_events.py +++ b/backend/penndata/management/commands/get_penn_today_events.py @@ -117,13 +117,14 @@ def handle(self, *args, **kwargs): def connect_and_parse_html(self, event_url, condition): try: - # from selenium.webdriver.chrome.service import Service + from selenium.webdriver.chrome.service import Service + print(chromedriver_binary.chromedriver_filename) - # service = Service(executable_path=chromedriver_binary.chromedriver_filename) + service = Service(executable_path=chromedriver_binary.chromedriver_filename) options = webdriver.ChromeOptions() options.add_argument("--headless") - driver = webdriver.Chrome(options=options) + driver = webdriver.Chrome(options=options, service=service) driver.get(event_url) print("WAITING FOR ELEMENT") From ec7d05e83d440786b3fa7de76775a954349f00af Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:50:50 -0400 Subject: [PATCH 5/8] Download chrome browser --- backend/Dockerfile | 3 +++ .../penndata/management/commands/get_penn_today_events.py | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 434ce97e..726828ce 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -1,5 +1,8 @@ FROM pennlabs/django-base:b269ea1613686b1ac6370154debbb741b012de1a-3.11 +RUN wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb +RUN apt-get install ./google-chrome-stable_current_amd64.deb -y + LABEL maintainer="Penn Labs" # Copy project dependencies diff --git a/backend/penndata/management/commands/get_penn_today_events.py b/backend/penndata/management/commands/get_penn_today_events.py index 84d231e0..0b4514b6 100644 --- a/backend/penndata/management/commands/get_penn_today_events.py +++ b/backend/penndata/management/commands/get_penn_today_events.py @@ -117,14 +117,14 @@ def handle(self, *args, **kwargs): def connect_and_parse_html(self, event_url, condition): try: - from selenium.webdriver.chrome.service import Service print(chromedriver_binary.chromedriver_filename) + from selenium.webdriver.chrome.options import Options + + options = Options() - service = Service(executable_path=chromedriver_binary.chromedriver_filename) - options = webdriver.ChromeOptions() options.add_argument("--headless") - driver = webdriver.Chrome(options=options, service=service) + driver = webdriver.Chrome(options=options) driver.get(event_url) print("WAITING FOR ELEMENT") From 4f9dfa1c718825377346b4f63e756e236cc42bba Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 17:53:55 -0400 Subject: [PATCH 6/8] Change to dev for gh testing --- backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 726828ce..103098c4 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -14,7 +14,7 @@ RUN pipenv install --system # Copy project files COPY . /app/ -ENV DJANGO_SETTINGS_MODULE pennmobile.settings.production +ENV DJANGO_SETTINGS_MODULE pennmobile.settings.development ENV SECRET_KEY 'temporary key just to build the docker image' # Collect static files From 899895561e39a3e969cb348a257c5504c22ee260 Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 18:02:11 -0400 Subject: [PATCH 7/8] Revert --- backend/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/Dockerfile b/backend/Dockerfile index 103098c4..726828ce 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -14,7 +14,7 @@ RUN pipenv install --system # Copy project files COPY . /app/ -ENV DJANGO_SETTINGS_MODULE pennmobile.settings.development +ENV DJANGO_SETTINGS_MODULE pennmobile.settings.production ENV SECRET_KEY 'temporary key just to build the docker image' # Collect static files From ae8945ad40e4743691486e53fffb52a0fe603f44 Mon Sep 17 00:00:00 2001 From: Justin Zhang Date: Sun, 10 Mar 2024 18:40:13 -0400 Subject: [PATCH 8/8] No luck yet --- backend/.devcontainer/Dockerfile | 17 +++++++++++++++++ backend/.devcontainer/devcontainer.json | 7 +++++++ 2 files changed, 24 insertions(+) create mode 100644 backend/.devcontainer/Dockerfile create mode 100644 backend/.devcontainer/devcontainer.json diff --git a/backend/.devcontainer/Dockerfile b/backend/.devcontainer/Dockerfile new file mode 100644 index 00000000..6f58371b --- /dev/null +++ b/backend/.devcontainer/Dockerfile @@ -0,0 +1,17 @@ +FROM --platform=linux/amd64 python:3.11-slim-buster + +ARG IMAGE_NAME=pennlabs/mobile-backend-devcontainer + +# Install build dependencies +RUN apt-get update && apt-get install -y gcc libpq-dev libc-dev git-all wget \ + && rm -rf /var/lib/apt/lists/* + + +RUN wget -q https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb + +RUN apt install -y ./google-chrome-stable_current_amd64.deb + +RUN pip install pipenv + +ENV DJANGO_SETTINGS_MODULE pennmobile.settings.development +ENV SECRET_KEY 'temporary key just to build the docker image' \ No newline at end of file diff --git a/backend/.devcontainer/devcontainer.json b/backend/.devcontainer/devcontainer.json new file mode 100644 index 00000000..f47fb0af --- /dev/null +++ b/backend/.devcontainer/devcontainer.json @@ -0,0 +1,7 @@ +{ + "name": "Penn Mobile Backend", + "build": { + "dockerfile": "Dockerfile" + }, + "runArgs": ["--platform=linux/amd64" ] +} \ No newline at end of file