diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 38961a2..06b0616 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -9,16 +9,16 @@ jobs: name: Updates changelog runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - uses: dangoslen/changelog-enforcer@v3 test: runs-on: ubuntu-latest steps: - name: Checkout repo - uses: actions/checkout@v3 + uses: actions/checkout@v4 - name: Set up Python 3.12 - uses: actions/setup-python@v4 + uses: actions/setup-python@v5 with: python-version: '3.12' cache: 'pip' diff --git a/CHANGELOG.md b/CHANGELOG.md index 1960cd4..fd529f9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 2024-12-17 +### Added +- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data + ## 2024-11-13 ### Added - Add BranchCodesMap alarms checking that it's in sync with all branches with location hours diff --git a/README.md b/README.md index b6deb98..b217580 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo * When there are fewer than 10000 new location visits records for the previous day * When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows * When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows +* When the sites from the aggregated location visits don't perfectly match the known sites +* When there are duplicate aggregated location visits sites +* When less than 50% of sites had a healthy day of location visits * When the number of active itype/location/stat group codes in Sierra and Redshift differs * When there are duplicate active itype/location/stat group codes in Redshift * When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated diff --git a/alarm_controller.py b/alarm_controller.py index bca137c..946e920 100644 --- a/alarm_controller.py +++ b/alarm_controller.py @@ -2,6 +2,7 @@ from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms from alarms.models.circ_trans_alarms import CircTransAlarms +from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms from alarms.models.holds_alarms import HoldsAlarms from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms @@ -64,6 +65,7 @@ def _setup_alarms(self): BranchCodesMapAlarms(self.redshift_client), CircTransAlarms(self.redshift_client, self.sierra_client), GranularLocationVisitsAlarms(self.redshift_client), + DailyLocationVisitsAlarms(self.redshift_client), HoldsAlarms(self.redshift_client), OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials), PatronInfoAlarms(self.redshift_client, self.sierra_client), diff --git a/alarms/models/daily_location_visits_alarms.py b/alarms/models/daily_location_visits_alarms.py new file mode 100644 index 0000000..2b4ef29 --- /dev/null +++ b/alarms/models/daily_location_visits_alarms.py @@ -0,0 +1,84 @@ +import os + +from alarms.alarm import Alarm +from datetime import timedelta +from helpers.query_helper import build_redshift_daily_location_visits_query +from nypl_py_utils.classes.s3_client import S3Client +from nypl_py_utils.functions.log_helper import create_log + + +class DailyLocationVisitsAlarms(Alarm): + def __init__(self, redshift_client): + super().__init__(redshift_client) + self.logger = create_log("daily_location_visits_alarms") + + def run_checks(self): + date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat() + self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n") + s3_client = S3Client( + os.environ["SHOPPERTRAK_S3_BUCKET"], os.environ["SHOPPERTRAK_S3_RESOURCE"] + ) + all_shoppertrak_sites = set(s3_client.fetch_cache()) + s3_client.close() + + redshift_table = "daily_location_visits" + self.redshift_suffix + redshift_query = build_redshift_daily_location_visits_query( + redshift_table, date_to_test + ) + + self.redshift_client.connect() + redshift_results = self.redshift_client.execute_query(redshift_query) + self.redshift_client.close_connection() + + redshift_sites = [] + redshift_healthy = [] + for shoppertrak_site, is_all_healthy in redshift_results: + redshift_sites.append(shoppertrak_site) + redshift_healthy.append(int(is_all_healthy)) + + self.check_redshift_duplicate_sites_alarm(redshift_sites) + self.check_redshift_missing_sites_alarm(redshift_sites, all_shoppertrak_sites) + self.check_redshift_extra_sites_alarm(redshift_sites, all_shoppertrak_sites) + self.check_redshift_healthy_sites_alarm(redshift_healthy) + + def check_redshift_duplicate_sites_alarm(self, redshift_sites): + seen_sites = set() + duplicate_sites = set() + for site in redshift_sites: + if site in seen_sites: + duplicate_sites.add(site) + seen_sites.add(site) + + if duplicate_sites: + self.logger.error( + "The following ShopperTrak sites are duplicated: {}".format( + sorted(list(duplicate_sites)) + ) + ) + + def check_redshift_missing_sites_alarm(self, redshift_sites, all_sites): + missing_sites = all_sites.difference(set(redshift_sites)) + if missing_sites: + self.logger.error( + "The following ShopperTrak sites are missing: {}".format( + sorted(list(missing_sites)) + ) + ) + + def check_redshift_extra_sites_alarm(self, redshift_sites, all_sites): + extra_sites = set(redshift_sites).difference(all_sites) + if extra_sites: + self.logger.error( + "The following unknown ShopperTrak site ids were found: {}".format( + sorted(list(extra_sites)) + ) + ) + + def check_redshift_healthy_sites_alarm(self, redshift_healthy): + percent_healthy = sum(redshift_healthy) / len(redshift_healthy) + if percent_healthy < 0.5: + self.logger.error( + "Only {0:.2f}% of ShopperTrak sites were healthy".format( + percent_healthy * 100 + ) + ) diff --git a/config/devel.yaml b/config/devel.yaml index bc66979..ab7574a 100644 --- a/config/devel.yaml +++ b/config/devel.yaml @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES: ENVISIONWARE_DB_PORT: 3306 ENVISIONWARE_DB_NAME: lasttwodays REDSHIFT_DB_NAME: dev + SHOPPERTRAK_S3_BUCKET: shoppertrak-sites + SHOPPERTRAK_S3_RESOURCE: site_ids.json LOG_LEVEL: info SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGkwZwYJKoZIhvcNAQcGoFowWAIBADBTBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDCu20jxZpTC9cf9V8QIBEIAmt4TZJ7JuFQ1C845HxG8wAXzC7SFHkMNe4U6rKlD1twveXygfiQc= SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH diff --git a/config/production.yaml b/config/production.yaml index 5f5fd5c..dff523a 100644 --- a/config/production.yaml +++ b/config/production.yaml @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES: ENVISIONWARE_DB_PORT: 3306 ENVISIONWARE_DB_NAME: lasttwodays REDSHIFT_DB_NAME: production + SHOPPERTRAK_S3_BUCKET: shoppertrak-sites + SHOPPERTRAK_S3_RESOURCE: site_ids.json LOG_LEVEL: info SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDFrV5IoeP0tL98V0zgIBEIApBnZ9IWKJ/s6F++zu0rOeWwfB+Kkwh4aFt68vD7jv4LaO0zOppTOPycA= SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH diff --git a/config/qa.yaml b/config/qa.yaml index 9209593..1146087 100644 --- a/config/qa.yaml +++ b/config/qa.yaml @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES: ENVISIONWARE_DB_PORT: 3306 ENVISIONWARE_DB_NAME: lasttwodays REDSHIFT_DB_NAME: qa + SHOPPERTRAK_S3_BUCKET: shoppertrak-sites + SHOPPERTRAK_S3_RESOURCE: site_ids.json LOG_LEVEL: info SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDL6zq0QQYBhdW4rz8gIBEIApxucxIVAb1Ec4uHUAwxRZcvC8OAZxuj/oJAkhCPh8sPJJ08w3ECoXoKk= SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH diff --git a/helpers/query_helper.py b/helpers/query_helper.py index 5c2f59f..1ffaf7c 100755 --- a/helpers/query_helper.py +++ b/helpers/query_helper.py @@ -14,6 +14,11 @@ "SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';" ) +_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = ( + "SELECT shoppertrak_site_id, is_all_healthy FROM {table} " + "WHERE visits_date = '{date}';" +) + _REDSHIFT_LOCATION_VISITS_COUNT_QUERY = ( "SELECT COUNT(id) FROM {table} " "WHERE increment_start::DATE = '{date}' AND is_fresh;" @@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date): ) +def build_redshift_daily_location_visits_query(table, date): + return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date) + + def build_redshift_location_visits_count_query(table, date): return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date) diff --git a/requirements.txt b/requirements.txt index 6a389a9..e103358 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,2 @@ -nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0 +nypl-py-utils[mysql-client,postgresql-client,redshift-client,s3-client,config-helper]==1.6.2 selenium>=4.10.0 \ No newline at end of file diff --git a/tests/alarms/models/test_daily_location_visits_alarms.py b/tests/alarms/models/test_daily_location_visits_alarms.py new file mode 100644 index 0000000..4e87c8f --- /dev/null +++ b/tests/alarms/models/test_daily_location_visits_alarms.py @@ -0,0 +1,125 @@ +import logging +import pytest + +from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms +from datetime import date + + +class TestDailyLocationVisitsAlarms: + @pytest.fixture + def test_instance(self, mocker): + mocker.patch( + "alarms.models.daily_location_visits_alarms.S3Client.fetch_cache", + return_value=["aa", "bb", "cc"], + ) + return DailyLocationVisitsAlarms(mocker.MagicMock()) + + def test_init(self, mocker): + daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock()) + assert daily_location_visits_alarms.redshift_suffix == "_test_redshift_db" + assert daily_location_visits_alarms.run_added_tests + assert daily_location_visits_alarms.yesterday_date == date(2023, 5, 31) + assert daily_location_visits_alarms.yesterday == "2023-05-31" + + def test_run_checks_no_alarm(self, mocker, caplog): + daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock()) + mock_s3_client = mocker.MagicMock() + mock_s3_constructor = mocker.patch( + "alarms.models.daily_location_visits_alarms.S3Client", + return_value=mock_s3_client, + ) + mock_s3_client.fetch_cache.return_value = ["aa", "bb", "cc"] + + mock_redshift_query = mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query", + return_value="redshift query", + ) + daily_location_visits_alarms.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + daily_location_visits_alarms.run_checks() + assert caplog.text == "" + + mock_s3_constructor.assert_called_once_with( + "test_shoppertrak_s3_bucket", "test_shoppertrak_s3_resource" + ) + mock_s3_client.fetch_cache.assert_called_once() + mock_s3_client.close.assert_called_once() + daily_location_visits_alarms.redshift_client.connect.assert_called_once() + mock_redshift_query.assert_called_once_with( + "daily_location_visits_test_redshift_db", "2023-05-02" + ) + daily_location_visits_alarms.redshift_client.execute_query.assert_called_once_with( + "redshift query" + ) + daily_location_visits_alarms.redshift_client.close_connection.assert_called_once() + + def test_run_checks_redshift_duplicate_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["bb", True], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text + + def test_run_checks_redshift_missing_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["cc", True], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text + + def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", True], + ["cc", False], + ["ee", True], + ["dd", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert ( + "The following unknown ShopperTrak site ids were found: ['dd', 'ee']" + ) in caplog.text + + def test_run_checks_redshift_healthy_sites_alarm( + self, test_instance, mocker, caplog + ): + mocker.patch( + "alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query" + ) + test_instance.redshift_client.execute_query.return_value = ( + ["aa", True], + ["bb", False], + ["cc", False], + ) + + with caplog.at_level(logging.ERROR): + test_instance.run_checks() + assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text diff --git a/tests/alarms/models/test_granular_location_visits_alarms.py b/tests/alarms/models/test_granular_location_visits_alarms.py index 540c1e8..1bf04a5 100644 --- a/tests/alarms/models/test_granular_location_visits_alarms.py +++ b/tests/alarms/models/test_granular_location_visits_alarms.py @@ -11,11 +11,11 @@ def test_instance(self, mocker): return GranularLocationVisitsAlarms(mocker.MagicMock()) def test_init(self, mocker): - location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock()) - assert location_visits_alarms.redshift_suffix == "_test_redshift_db" - assert location_visits_alarms.run_added_tests - assert location_visits_alarms.yesterday_date == date(2023, 5, 31) - assert location_visits_alarms.yesterday == "2023-05-31" + gran_location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock()) + assert gran_location_visits_alarms.redshift_suffix == "_test_redshift_db" + assert gran_location_visits_alarms.run_added_tests + assert gran_location_visits_alarms.yesterday_date == date(2023, 5, 31) + assert gran_location_visits_alarms.yesterday == "2023-05-31" def test_run_checks_no_alarm(self, test_instance, mocker, caplog): mock_redshift_count_query = mocker.patch( diff --git a/tests/conftest.py b/tests/conftest.py index fcd3e9f..c5ba23a 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,6 +22,8 @@ "ENVISIONWARE_DB_PASSWORD": "test_envisionware_password", "OVERDRIVE_USERNAME": "test_overdrive_username", "OVERDRIVE_PASSWORD": "test_overdrive_password", + "SHOPPERTRAK_S3_BUCKET": "test_shoppertrak_s3_bucket", + "SHOPPERTRAK_S3_RESOURCE": "test_shoppertrak_s3_resource", }