From 18851bfe35a5479330373417b49763ec3741147b Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 29 Oct 2024 14:32:54 -0400 Subject: [PATCH 1/4] Rename LocationVisitsAlarms to GranularLocationVisitsAlarms Also remove chrome_installer and add alarm for timestamps divisible by 96 --- alarm_controller.py | 8 +- ....py => granular_location_visits_alarms.py} | 30 +++++-- chrome_installer.sh | 21 ----- helpers/query_helper.py | 88 ++++++++++--------- ...> test_granular_location_visits_alarms.py} | 63 ++++++++----- 5 files changed, 118 insertions(+), 92 deletions(-) rename alarms/models/{location_visits_alarms.py => granular_location_visits_alarms.py} (75%) delete mode 100755 chrome_installer.sh rename tests/alarms/models/{test_location_visits_alarms.py => test_granular_location_visits_alarms.py} (61%) diff --git a/alarm_controller.py b/alarm_controller.py index 55265ac..7c814b4 100644 --- a/alarm_controller.py +++ b/alarm_controller.py @@ -1,8 +1,9 @@ import os from alarms.models.circ_trans_alarms import CircTransAlarms +from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms +from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from alarms.models.holds_alarms import HoldsAlarms -from alarms.models.location_visits_alarms import LocationVisitsAlarms from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms from alarms.models.patron_info_alarms import PatronInfoAlarms from alarms.models.pc_reserve_alarms import PcReserveAlarms @@ -61,8 +62,9 @@ def _setup_alarms(self): self.logger.info("Setting up alarms...") return [ CircTransAlarms(self.redshift_client, self.sierra_client), + DailyLocationVisitsAlarms(self.redshift_client), + GranularLocationVisitsAlarms(self.redshift_client), HoldsAlarms(self.redshift_client), - LocationVisitsAlarms(self.redshift_client), OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials), PatronInfoAlarms(self.redshift_client, self.sierra_client), PcReserveAlarms(self.redshift_client, self.envisionware_client), @@ -73,4 +75,4 @@ def _setup_alarms(self): def run_alarms(self): for e in self.alarms: - e.run_checks() + e.run_checks() \ No newline at end of file diff --git a/alarms/models/location_visits_alarms.py b/alarms/models/granular_location_visits_alarms.py similarity index 75% rename from alarms/models/location_visits_alarms.py rename to alarms/models/granular_location_visits_alarms.py index b975574..05ad6dd 100644 --- a/alarms/models/location_visits_alarms.py +++ b/alarms/models/granular_location_visits_alarms.py @@ -8,16 +8,16 @@ from nypl_py_utils.functions.log_helper import create_log -class LocationVisitsAlarms(Alarm): +class GranularLocationVisitsAlarms(Alarm): def __init__(self, redshift_client): super().__init__(redshift_client) - self.logger = create_log("location_visits_alarms") + self.logger = create_log("granular_location_visits_alarms") def run_checks(self): if not self.run_added_tests: return - self.logger.info("\nLOCATION VISITS\n") + self.logger.info("\nGRANULAR LOCATION VISITS\n") redshift_table = "location_visits" + self.redshift_suffix stale_start_date = (self.yesterday_date - timedelta(days=30)).isoformat() redshift_count_query = build_redshift_location_visits_count_query( @@ -40,13 +40,16 @@ def run_checks(self): redshift_stale_rows = self.redshift_client.execute_query(redshift_stale_query) self.redshift_client.close_connection() - self.new_location_visits_less_than_ten_thousand_alarm( + self.check_location_visits_less_than_ten_thousand_alarm( + redshift_count, redshift_table + ) + self.check_location_visits_evenly_divisible_alarm( redshift_count, redshift_table ) self.check_redshift_duplicates_alarm(redshift_duplicates) self.check_redshift_stale_rows_alarm(redshift_stale_rows) - def new_location_visits_less_than_ten_thousand_alarm( + def check_location_visits_less_than_ten_thousand_alarm( self, redshift_count, redshift_table ): if redshift_count < 10000: @@ -61,6 +64,21 @@ def new_location_visits_less_than_ten_thousand_alarm( ) ) + def check_location_visits_evenly_divisible_alarm( + self, redshift_count, redshift_table + ): + if redshift_count % 96: + self.logger.error( + ( + "Number of {redshift_table} rows on {date} not divisible by 96: " + "{redshift_count}" + ).format( + redshift_count=redshift_count, + date=self.yesterday, + redshift_table=redshift_table, + ) + ) + def check_redshift_duplicates_alarm(self, redshift_duplicates): if len(redshift_duplicates) > 0: self.logger.error( @@ -76,4 +94,4 @@ def check_redshift_stale_rows_alarm(self, redshift_stale_rows): "The following (shoppertrak_site_id, orbit, increment_start) " "combinations are marked as stale and have not been replaced " "with a fresh row: {}".format(redshift_stale_rows) - ) + ) \ No newline at end of file diff --git a/chrome_installer.sh b/chrome_installer.sh deleted file mode 100755 index 2e22c8b..0000000 --- a/chrome_installer.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -set -e - -latest_stable_json="https://googlechromelabs.github.io/chrome-for-testing/last-known-good-versions-with-downloads.json" -json_data=$(curl -s "$latest_stable_json") - -latest_chrome_linux_download_url="$(echo "$json_data" | jq -r ".channels.Stable.downloads.chrome[0].url")" -latest_chrome_driver_linux_download_url="$(echo "$json_data" | jq -r ".channels.Stable.downloads.chromedriver[0].url")" - -download_path_chrome_linux="/opt/chrome-headless-shell-linux.zip" -dowload_path_chrome_driver_linux="/opt/chrome-driver-linux.zip" - -mkdir -p "/opt/chrome" -curl -Lo $download_path_chrome_linux $latest_chrome_linux_download_url -unzip -q $download_path_chrome_linux -d "/opt/chrome" -rm -rf $download_path_chrome_linux - -mkdir -p "/opt/chrome-driver" -curl -Lo $dowload_path_chrome_driver_linux $latest_chrome_driver_linux_download_url -unzip -q $dowload_path_chrome_driver_linux -d "/opt/chrome-driver" -rm -rf $dowload_path_chrome_driver_linux \ No newline at end of file diff --git a/helpers/query_helper.py b/helpers/query_helper.py index 845a2a4..eb38f6c 100755 --- a/helpers/query_helper.py +++ b/helpers/query_helper.py @@ -3,6 +3,37 @@ "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';" ) +_REDSHIFT_DAILY_LOCATION_VISITS_COUNT_QUERY = ( + "SELECT COUNT(*) FROM {table} WHERE visits_date = '{date}';" +) + +_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = ( + "SELECT COUNT(id) FROM {table} " + "WHERE increment_start::DATE = '{date}' AND is_fresh;" +) + +_REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY = """ + SELECT shoppertrak_site_id, orbit, increment_start + FROM {table} + WHERE increment_start::DATE = '{date}' AND is_fresh + GROUP BY shoppertrak_site_id, orbit, increment_start + HAVING COUNT(*) > 1;""" + +_REDSHIFT_LOCATION_VISITS_STALE_QUERY = """ + WITH stale_keys AS ( + SELECT shoppertrak_site_id, orbit, increment_start, + CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) AS key + FROM {table} + WHERE poll_date >= '{date}' AND NOT is_fresh + ) + SELECT shoppertrak_site_id, orbit, increment_start + FROM stale_keys + WHERE key NOT IN ( + SELECT CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) + FROM {table} + WHERE poll_date >= '{date}' AND is_fresh + );""" + _REDSHIFT_HOLDS_QUERY = ( "SELECT COUNT(id) FROM {table} WHERE TRUNC(update_timestamp) = '{date}';" ) @@ -51,33 +82,6 @@ ) );""" -_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = ( - "SELECT COUNT(id) FROM {table} " - "WHERE increment_start::DATE = '{date}' AND is_fresh;" -) - -_REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY = """ - SELECT shoppertrak_site_id, orbit, increment_start - FROM {table} - WHERE increment_start::DATE = '{date}' AND is_fresh - GROUP BY shoppertrak_site_id, orbit, increment_start - HAVING COUNT(*) > 1;""" - -_REDSHIFT_LOCATION_VISITS_STALE_QUERY = """ - WITH stale_keys AS ( - SELECT shoppertrak_site_id, orbit, increment_start, - CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) AS key - FROM {table} - WHERE poll_date >= '{date}' AND NOT is_fresh - ) - SELECT shoppertrak_site_id, orbit, increment_start - FROM stale_keys - WHERE key NOT IN ( - SELECT CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) - FROM {table} - WHERE poll_date >= '{date}' AND is_fresh - );""" - _REDSHIFT_OVERDRIVE_QUERY = ( "SELECT COUNT(*) FROM {table} WHERE transaction_et = '{date}';" ) @@ -171,6 +175,22 @@ def build_redshift_circ_trans_query(table, date_field, date): ) +def build_redshift_daily_visits_query(table, date): + return _REDSHIFT_DAILY_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) + + +def build_redshift_location_visits_count_query(table, date): + return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) + + +def build_redshift_location_visits_duplicate_query(table, date): + return _REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY.format(table=table, date=date) + + +def build_redshift_location_visits_stale_query(table, date): + return _REDSHIFT_LOCATION_VISITS_STALE_QUERY.format(table=table, date=date) + + def build_redshift_holds_query(table, date): return _REDSHIFT_HOLDS_QUERY.format(table=table, date=date) @@ -187,18 +207,6 @@ def build_redshift_holds_null_query(table, date): return _REDSHIFT_HOLDS_NULL_QUERY.format(table=table, date=date) -def build_redshift_location_visits_count_query(table, date): - return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) - - -def build_redshift_location_visits_duplicate_query(table, date): - return _REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY.format(table=table, date=date) - - -def build_redshift_location_visits_stale_query(table, date): - return _REDSHIFT_LOCATION_VISITS_STALE_QUERY.format(table=table, date=date) - - def build_redshift_overdrive_query(table, date): return _REDSHIFT_OVERDRIVE_QUERY.format(table=table, date=date) @@ -264,4 +272,4 @@ def build_sierra_itypes_count_query(): def build_envisionware_pc_reserve_query(date): - return _ENVISIONWARE_PC_RESERVE_QUERY.format(date=date) + return _ENVISIONWARE_PC_RESERVE_QUERY.format(date=date) \ No newline at end of file diff --git a/tests/alarms/models/test_location_visits_alarms.py b/tests/alarms/models/test_granular_location_visits_alarms.py similarity index 61% rename from tests/alarms/models/test_location_visits_alarms.py rename to tests/alarms/models/test_granular_location_visits_alarms.py index bc39a17..86f9899 100644 --- a/tests/alarms/models/test_location_visits_alarms.py +++ b/tests/alarms/models/test_granular_location_visits_alarms.py @@ -1,19 +1,19 @@ import logging import pytest -from alarms.models.location_visits_alarms import LocationVisitsAlarms +from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from datetime import date, datetime -class TestLocationVisitsAlarms: +class TestGranularLocationVisitsAlarms: @pytest.fixture def test_instance(self, mocker): mock_redshift_client = mocker.MagicMock() - return LocationVisitsAlarms(mock_redshift_client) + return GranularLocationVisitsAlarms(mock_redshift_client) def test_init(self, mocker): mock_redshift_client = mocker.MagicMock() - location_visits_alarms = LocationVisitsAlarms(mock_redshift_client) + location_visits_alarms = GranularLocationVisitsAlarms(mock_redshift_client) assert location_visits_alarms.redshift_suffix == "_test_redshift_db" assert location_visits_alarms.run_added_tests assert location_visits_alarms.yesterday_date == date(2023, 5, 31) @@ -21,18 +21,18 @@ def test_init(self, mocker): def test_run_checks_no_alarm(self, test_instance, mocker, caplog): mock_redshift_count_query = mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_count_query", + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query", return_value="redshift count query", ) mock_redshift_duplicate_query = mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_duplicate_query", + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query", return_value="redshift duplicate query", ) mock_redshift_stale_query = mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_stale_query", + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query", return_value="redshift stale query", ) - test_instance.redshift_client.execute_query.side_effect = [([11000],), (), ()] + test_instance.redshift_client.execute_query.side_effect = [([11040],), (), ()] with caplog.at_level(logging.ERROR): test_instance.run_checks() @@ -59,35 +59,54 @@ def test_run_checks_no_alarm(self, test_instance, mocker, caplog): def test_run_checks_no_records_alarm(self, test_instance, mocker, caplog): mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_count_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_duplicate_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_stale_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) - test_instance.redshift_client.execute_query.side_effect = [([10],), (), ()] + test_instance.redshift_client.execute_query.side_effect = [([96],), (), ()] with caplog.at_level(logging.ERROR): test_instance.run_checks() assert ( - "Found only 10 location_visits_test_redshift_db rows for all " + "Found only 96 location_visits_test_redshift_db rows for all " "of 2023-05-31" ) in caplog.text + + def test_run_checks_bad_records_num_alarm(self, test_instance, mocker, caplog): + mocker.patch( + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query" + ) + mocker.patch( + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query" + ) + mocker.patch( + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" + ) + test_instance.redshift_client.execute_query.side_effect = [([11000],), (), ()] + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert ( + "Number of location_visits_test_redshift_db rows on 2023-05-31 not " + "divisible by 96: 11000" + ) in caplog.text def test_run_checks_duplicate_records_alarm(self, test_instance, mocker, caplog): mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_count_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_duplicate_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_stale_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) test_instance.redshift_client.execute_query.side_effect = [ - ([11000],), + ([11040],), ( ["aa", 1, datetime(2023, 5, 31, 9, 0, 0)], ["bb", 2, datetime(2023, 5, 31, 9, 15, 0)], @@ -106,16 +125,16 @@ def test_run_checks_duplicate_records_alarm(self, test_instance, mocker, caplog) def test_run_checks_stale_records_alarm(self, test_instance, mocker, caplog): mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_count_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_duplicate_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query" ) mocker.patch( - "alarms.models.location_visits_alarms.build_redshift_location_visits_stale_query" + "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) test_instance.redshift_client.execute_query.side_effect = [ - ([11000],), + ([11040],), (), ( ["aa", 1, datetime(2023, 5, 31, 9, 0, 0)], @@ -130,4 +149,4 @@ def test_run_checks_stale_records_alarm(self, test_instance, mocker, caplog): "combinations are marked as stale and have not been replaced " "with a fresh row: (['aa', 1, FakeDatetime(2023, 5, 31, 9, " "0)], ['bb', 2, FakeDatetime(2023, 5, 31, 9, 15)])" - ) in caplog.text + ) in caplog.text \ No newline at end of file From 2b2d4ca23eb39b059f05125b6062990dc792c0d6 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Tue, 29 Oct 2024 15:19:47 -0400 Subject: [PATCH 2/4] Remove unnecessary location visits alarms --- Makefile | 2 +- alarm_controller.py | 4 +-- .../models/granular_location_visits_alarms.py | 20 +----------- helpers/query_helper.py | 10 +----- .../test_granular_location_visits_alarms.py | 31 ++++--------------- 5 files changed, 10 insertions(+), 57 deletions(-) diff --git a/Makefile b/Makefile index a87186a..a4e48e3 100644 --- a/Makefile +++ b/Makefile @@ -8,7 +8,7 @@ help: @echo "make test" @echo " run associated test suite with pytest" @echo "make lint" - @echo " lint project files using the flake8 linter" + @echo " lint project files using the black linter" run: export ENVIRONMENT=devel; \ diff --git a/alarm_controller.py b/alarm_controller.py index 7c814b4..1f8ef3a 100644 --- a/alarm_controller.py +++ b/alarm_controller.py @@ -1,7 +1,6 @@ import os from alarms.models.circ_trans_alarms import CircTransAlarms -from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from alarms.models.holds_alarms import HoldsAlarms from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms @@ -62,7 +61,6 @@ def _setup_alarms(self): self.logger.info("Setting up alarms...") return [ CircTransAlarms(self.redshift_client, self.sierra_client), - DailyLocationVisitsAlarms(self.redshift_client), GranularLocationVisitsAlarms(self.redshift_client), HoldsAlarms(self.redshift_client), OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials), @@ -75,4 +73,4 @@ def _setup_alarms(self): def run_alarms(self): for e in self.alarms: - e.run_checks() \ No newline at end of file + e.run_checks() diff --git a/alarms/models/granular_location_visits_alarms.py b/alarms/models/granular_location_visits_alarms.py index 05ad6dd..a21080a 100644 --- a/alarms/models/granular_location_visits_alarms.py +++ b/alarms/models/granular_location_visits_alarms.py @@ -43,9 +43,6 @@ def run_checks(self): self.check_location_visits_less_than_ten_thousand_alarm( redshift_count, redshift_table ) - self.check_location_visits_evenly_divisible_alarm( - redshift_count, redshift_table - ) self.check_redshift_duplicates_alarm(redshift_duplicates) self.check_redshift_stale_rows_alarm(redshift_stale_rows) @@ -64,21 +61,6 @@ def check_location_visits_less_than_ten_thousand_alarm( ) ) - def check_location_visits_evenly_divisible_alarm( - self, redshift_count, redshift_table - ): - if redshift_count % 96: - self.logger.error( - ( - "Number of {redshift_table} rows on {date} not divisible by 96: " - "{redshift_count}" - ).format( - redshift_count=redshift_count, - date=self.yesterday, - redshift_table=redshift_table, - ) - ) - def check_redshift_duplicates_alarm(self, redshift_duplicates): if len(redshift_duplicates) > 0: self.logger.error( @@ -94,4 +76,4 @@ def check_redshift_stale_rows_alarm(self, redshift_stale_rows): "The following (shoppertrak_site_id, orbit, increment_start) " "combinations are marked as stale and have not been replaced " "with a fresh row: {}".format(redshift_stale_rows) - ) \ No newline at end of file + ) diff --git a/helpers/query_helper.py b/helpers/query_helper.py index eb38f6c..e305036 100755 --- a/helpers/query_helper.py +++ b/helpers/query_helper.py @@ -3,10 +3,6 @@ "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';" ) -_REDSHIFT_DAILY_LOCATION_VISITS_COUNT_QUERY = ( - "SELECT COUNT(*) FROM {table} WHERE visits_date = '{date}';" -) - _REDSHIFT_LOCATION_VISITS_COUNT_QUERY = ( "SELECT COUNT(id) FROM {table} " "WHERE increment_start::DATE = '{date}' AND is_fresh;" @@ -175,10 +171,6 @@ def build_redshift_circ_trans_query(table, date_field, date): ) -def build_redshift_daily_visits_query(table, date): - return _REDSHIFT_DAILY_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) - - def build_redshift_location_visits_count_query(table, date): return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) @@ -272,4 +264,4 @@ def build_sierra_itypes_count_query(): def build_envisionware_pc_reserve_query(date): - return _ENVISIONWARE_PC_RESERVE_QUERY.format(date=date) \ No newline at end of file + return _ENVISIONWARE_PC_RESERVE_QUERY.format(date=date) diff --git a/tests/alarms/models/test_granular_location_visits_alarms.py b/tests/alarms/models/test_granular_location_visits_alarms.py index 86f9899..c32166b 100644 --- a/tests/alarms/models/test_granular_location_visits_alarms.py +++ b/tests/alarms/models/test_granular_location_visits_alarms.py @@ -32,7 +32,7 @@ def test_run_checks_no_alarm(self, test_instance, mocker, caplog): "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query", return_value="redshift stale query", ) - test_instance.redshift_client.execute_query.side_effect = [([11040],), (), ()] + test_instance.redshift_client.execute_query.side_effect = [([11000],), (), ()] with caplog.at_level(logging.ERROR): test_instance.run_checks() @@ -67,33 +67,14 @@ def test_run_checks_no_records_alarm(self, test_instance, mocker, caplog): mocker.patch( "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) - test_instance.redshift_client.execute_query.side_effect = [([96],), (), ()] + test_instance.redshift_client.execute_query.side_effect = [([10],), (), ()] with caplog.at_level(logging.ERROR): test_instance.run_checks() assert ( - "Found only 96 location_visits_test_redshift_db rows for all " + "Found only 10 location_visits_test_redshift_db rows for all " "of 2023-05-31" ) in caplog.text - - def test_run_checks_bad_records_num_alarm(self, test_instance, mocker, caplog): - mocker.patch( - "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_count_query" - ) - mocker.patch( - "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_duplicate_query" - ) - mocker.patch( - "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" - ) - test_instance.redshift_client.execute_query.side_effect = [([11000],), (), ()] - - with caplog.at_level(logging.ERROR): - test_instance.run_checks() - assert ( - "Number of location_visits_test_redshift_db rows on 2023-05-31 not " - "divisible by 96: 11000" - ) in caplog.text def test_run_checks_duplicate_records_alarm(self, test_instance, mocker, caplog): mocker.patch( @@ -106,7 +87,7 @@ def test_run_checks_duplicate_records_alarm(self, test_instance, mocker, caplog) "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) test_instance.redshift_client.execute_query.side_effect = [ - ([11040],), + ([11000],), ( ["aa", 1, datetime(2023, 5, 31, 9, 0, 0)], ["bb", 2, datetime(2023, 5, 31, 9, 15, 0)], @@ -134,7 +115,7 @@ def test_run_checks_stale_records_alarm(self, test_instance, mocker, caplog): "alarms.models.granular_location_visits_alarms.build_redshift_location_visits_stale_query" ) test_instance.redshift_client.execute_query.side_effect = [ - ([11040],), + ([11000],), (), ( ["aa", 1, datetime(2023, 5, 31, 9, 0, 0)], @@ -149,4 +130,4 @@ def test_run_checks_stale_records_alarm(self, test_instance, mocker, caplog): "combinations are marked as stale and have not been replaced " "with a fresh row: (['aa', 1, FakeDatetime(2023, 5, 31, 9, " "0)], ['bb', 2, FakeDatetime(2023, 5, 31, 9, 15)])" - ) in caplog.text \ No newline at end of file + ) in caplog.text From 52199398c66fe5ebb0f59be24ce321536d38164c Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Wed, 30 Oct 2024 11:21:41 -0400 Subject: [PATCH 3/4] Add BranchCodesMap alarms --- CHANGELOG.md | 8 ++ README.md | 6 +- alarm_controller.py | 2 + alarms/models/branch_codes_map_alarms.py | 59 +++++++++++++ helpers/query_helper.py | 21 +++++ .../models/test_branch_codes_map_alarms.py | 82 +++++++++++++++++++ .../test_granular_location_visits_alarms.py | 6 +- 7 files changed, 179 insertions(+), 5 deletions(-) create mode 100644 alarms/models/branch_codes_map_alarms.py create mode 100644 tests/alarms/models/test_branch_codes_map_alarms.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 23be104..96078e3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +## 2024-10-30 +### Added +- Add BranchCodesMap alarms checking that it's in sync with all branches with location hours + +### Fixed +- Rename LocationVisits alarms to GranularLocationVisits alarms and remove unnecessary alarms +- Delete old chrome_installation file + ## 2024-09-19 ### Added - Refactor code to run in an ECS cluster rather than as a Lambda diff --git a/README.md b/README.md index c53967d..6f5517e 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,12 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo * When there are no OverDrive checkout records online (via OverDrive Marketplace) for the previous day * When the number of newly created/deleted patron records in Sierra and Redshift differs for any day in the previous week * When there are no newly created patron records in Sierra for the previous any day in the previous week +* When a single Sierra branch code maps to multiple Drupal branch codes +* When a Drupal branch code in location_hours does not contain a mapping to a Sierra branch code +* When a Sierra branch code with a mapping to a Drupal branch code does not appear in location_hours * When there are fewer than 10000 new location visits records for the previous day -* When a given location visits (site id, orbit, increment start) combination does not map to exactly one fresh row +* When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows +* When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows * When the number of active itype/location/stat group codes in Sierra and Redshift differs * When there are duplicate active itype/location/stat group codes in Redshift * When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated diff --git a/alarm_controller.py b/alarm_controller.py index 1f8ef3a..bca137c 100644 --- a/alarm_controller.py +++ b/alarm_controller.py @@ -1,5 +1,6 @@ import os +from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms from alarms.models.circ_trans_alarms import CircTransAlarms from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from alarms.models.holds_alarms import HoldsAlarms @@ -60,6 +61,7 @@ def _setup_database_clients(self, kms_client): def _setup_alarms(self): self.logger.info("Setting up alarms...") return [ + BranchCodesMapAlarms(self.redshift_client), CircTransAlarms(self.redshift_client, self.sierra_client), GranularLocationVisitsAlarms(self.redshift_client), HoldsAlarms(self.redshift_client), diff --git a/alarms/models/branch_codes_map_alarms.py b/alarms/models/branch_codes_map_alarms.py new file mode 100644 index 0000000..f23b0ed --- /dev/null +++ b/alarms/models/branch_codes_map_alarms.py @@ -0,0 +1,59 @@ +from alarms.alarm import Alarm +from helpers.query_helper import ( + build_redshift_branch_codes_duplicate_query, + build_redshift_branch_codes_hours_query, +) +from nypl_py_utils.functions.log_helper import create_log + + +class BranchCodesMapAlarms(Alarm): + def __init__(self, redshift_client): + super().__init__(redshift_client) + self.logger = create_log("branch_codes_map_alarms") + + def run_checks(self): + self.logger.info("\BRANCH CODES MAP\n") + branch_codes_table = "branch_codes_map" + self.redshift_suffix + location_hours_table = "location_hours" + self.redshift_suffix + + self.redshift_client.connect() + duplicates = self.redshift_client.execute_query( + build_redshift_branch_codes_duplicate_query(branch_codes_table) + ) + if self.run_added_tests: + mismatched_hours = self.redshift_client.execute_query( + build_redshift_branch_codes_hours_query( + location_hours_table, branch_codes_table + ) + ) + else: + mismatched_hours = [] + self.redshift_client.close_connection() + + ids_not_in_map = [el[0] for el in mismatched_hours if el[1] is None] + ids_not_in_hours = [el[1] for el in mismatched_hours if el[0] is None] + self.check_duplicate_sierra_codes(duplicates) + self.check_hours_ids_without_mapping(ids_not_in_map) + self.check_map_ids_without_hours(ids_not_in_hours) + + def check_duplicate_sierra_codes(self, duplicates): + if len(duplicates) > 0: + self.logger.error( + "The following Sierra branch codes map to more than one Drupal branch " + "code: {}".format(duplicates) + ) + + def check_hours_ids_without_mapping(self, ids_not_in_map): + if len(ids_not_in_map) > 0: + self.logger.error( + "The following Drupal branch codes have location hours but do not have " + "a known Sierra branch mapping: {}".format(ids_not_in_map) + ) + + def check_map_ids_without_hours(self, ids_not_in_hours): + if len(ids_not_in_hours) > 0: + self.logger.error( + "The following Sierra branch codes do not have known hours: {}".format( + ids_not_in_hours + ) + ) diff --git a/helpers/query_helper.py b/helpers/query_helper.py index e305036..5c2f59f 100755 --- a/helpers/query_helper.py +++ b/helpers/query_helper.py @@ -1,4 +1,15 @@ ### REDSHIFT QUERIES -- in order of _setup_alarms placement ### +_REDSHIFT_BRANCH_CODES_DUPLICATE_QUERY = """ + SELECT sierra_code FROM {} + GROUP BY sierra_code + HAVING COUNT(*) > 1;""" + +_REDSHIFT_BRANCH_CODES_HOURS_QUERY = """ + SELECT drupal_location_id, sierra_code + FROM {location_hours_table} FULL JOIN {branch_codes_table} + ON {location_hours_table}.drupal_location_id = {branch_codes_table}.drupal_code + WHERE drupal_location_id IS NULL OR sierra_code IS NULL;""" + _REDSHIFT_CIRC_TRANS_QUERY = ( "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';" ) @@ -165,6 +176,16 @@ ) +def build_redshift_branch_codes_duplicate_query(table): + return _REDSHIFT_BRANCH_CODES_DUPLICATE_QUERY.format(table) + + +def build_redshift_branch_codes_hours_query(location_hours_table, branch_codes_table): + return _REDSHIFT_BRANCH_CODES_HOURS_QUERY.format( + location_hours_table=location_hours_table, branch_codes_table=branch_codes_table + ) + + def build_redshift_circ_trans_query(table, date_field, date): return _REDSHIFT_CIRC_TRANS_QUERY.format( table=table, date_field=date_field, date=date diff --git a/tests/alarms/models/test_branch_codes_map_alarms.py b/tests/alarms/models/test_branch_codes_map_alarms.py new file mode 100644 index 0000000..a33c1fc --- /dev/null +++ b/tests/alarms/models/test_branch_codes_map_alarms.py @@ -0,0 +1,82 @@ +import logging +import pytest + +from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms + + +class TestBranchCodesMapAlarms: + @pytest.fixture + def test_instance(self, mocker): + return BranchCodesMapAlarms(mocker.MagicMock()) + + def test_init(self, mocker): + location_visits_alarms = BranchCodesMapAlarms(mocker.MagicMock()) + assert location_visits_alarms.redshift_suffix == "_test_redshift_db" + assert location_visits_alarms.run_added_tests + + def test_run_checks_no_alarm(self, test_instance, mocker, caplog): + mock_redshift_duplicate_query = mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_duplicate_query", + return_value="duplicate query", + ) + mock_redshift_hours_query = mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_hours_query", + return_value="hours query", + ) + test_instance.redshift_client.execute_query.side_effect = [(), ()] + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert caplog.text == "" + + test_instance.redshift_client.connect.assert_called_once() + mock_redshift_duplicate_query.assert_called_once_with( + "branch_codes_map_test_redshift_db" + ) + mock_redshift_hours_query.assert_called_once_with( + "location_hours_test_redshift_db", "branch_codes_map_test_redshift_db" + ) + test_instance.redshift_client.execute_query.assert_has_calls( + [mocker.call("duplicate query"), mocker.call("hours query")] + ) + test_instance.redshift_client.close_connection.assert_called_once() + + def test_run_checks_duplicate_alarm(self, test_instance, mocker, caplog): + mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_duplicate_query" + ) + mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_hours_query" + ) + test_instance.redshift_client.execute_query.side_effect = [(["aa"], ["bb"]), ()] + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + + assert ( + "The following Sierra branch codes map to more than one Drupal branch " + "code: (['aa'], ['bb'])" + ) in caplog.text + + def test_run_mismatched_hours_alarms(self, test_instance, mocker, caplog): + mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_duplicate_query" + ) + mocker.patch( + "alarms.models.branch_codes_map_alarms.build_redshift_branch_codes_hours_query" + ) + test_instance.redshift_client.execute_query.side_effect = [ + (), + (["AA", None], [None, "bb"], ["CC", None], [None, "dd"]), + ] + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + + assert ( + "The following Drupal branch codes have location hours but do not have a " + "known Sierra branch mapping: ['AA', 'CC']" + ) in caplog.text + assert ( + "The following Sierra branch codes do not have known hours: ['bb', 'dd']" + ) in caplog.text diff --git a/tests/alarms/models/test_granular_location_visits_alarms.py b/tests/alarms/models/test_granular_location_visits_alarms.py index c32166b..540c1e8 100644 --- a/tests/alarms/models/test_granular_location_visits_alarms.py +++ b/tests/alarms/models/test_granular_location_visits_alarms.py @@ -8,12 +8,10 @@ class TestGranularLocationVisitsAlarms: @pytest.fixture def test_instance(self, mocker): - mock_redshift_client = mocker.MagicMock() - return GranularLocationVisitsAlarms(mock_redshift_client) + return GranularLocationVisitsAlarms(mocker.MagicMock()) def test_init(self, mocker): - mock_redshift_client = mocker.MagicMock() - location_visits_alarms = GranularLocationVisitsAlarms(mock_redshift_client) + location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock()) assert location_visits_alarms.redshift_suffix == "_test_redshift_db" assert location_visits_alarms.run_added_tests assert location_visits_alarms.yesterday_date == date(2023, 5, 31) From d5c1d2631ad59b2058561593232227522701ecc0 Mon Sep 17 00:00:00 2001 From: aaronfriedman Date: Wed, 30 Oct 2024 11:31:05 -0400 Subject: [PATCH 4/4] Fix local deployment --- Makefile | 2 +- README.md | 9 +++------ alarms/models/branch_codes_map_alarms.py | 2 +- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index a4e48e3..6590ecf 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ help: run: export ENVIRONMENT=devel; \ - python -c 'import lambda_function; lambda_function.lambda_handler(None, None)' + python main.py test: pytest tests -W ignore::DeprecationWarning diff --git a/README.md b/README.md index 6f5517e..b6deb98 100644 --- a/README.md +++ b/README.md @@ -42,14 +42,11 @@ make run The application logs should output to your terminal. * Export your `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` and run `make run` -* Alternatively, to build and run a Docker container, copy the `configs` directory in the `Dockerfile` and run: +* Alternatively, to build and run a Docker container, run: ``` -docker build --platform linux/amd64 -t bic-alarms:local . +docker image build -t bic-alarms:local . -docker run --platform linux/amd64 -p 9000:8080 -e ENVIRONMENT=devel -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> bic-alarms:local - -# From a new terminal tab -curl "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{}' +docker container run -e ENVIRONMENT= -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> bic-alarms:local ``` ## Git workflow diff --git a/alarms/models/branch_codes_map_alarms.py b/alarms/models/branch_codes_map_alarms.py index f23b0ed..7e276ca 100644 --- a/alarms/models/branch_codes_map_alarms.py +++ b/alarms/models/branch_codes_map_alarms.py @@ -12,7 +12,7 @@ def __init__(self, redshift_client): self.logger = create_log("branch_codes_map_alarms") def run_checks(self): - self.logger.info("\BRANCH CODES MAP\n") + self.logger.info("\nBRANCH CODES MAP\n") branch_codes_table = "branch_codes_map" + self.redshift_suffix location_hours_table = "location_hours" + self.redshift_suffix