Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add DailyLocationVisitsAlarms #29

Merged
merged 2 commits into from
Dec 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/run-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@ jobs:
name: Updates changelog
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v4
- uses: dangoslen/changelog-enforcer@v3
test:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
uses: actions/checkout@v4

- name: Set up Python 3.12
uses: actions/setup-python@v4
uses: actions/setup-python@v5
with:
python-version: '3.12'
cache: 'pip'
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 2024-12-17
### Added
- Add DailyLocationVisits alarms checking that the Redshift daily_location_visits table has the right sites, has no duplicates, and contains mostly healthy data

## 2024-11-13
### Added
- Add BranchCodesMap alarms checking that it's in sync with all branches with location hours
Expand Down
3 changes: 3 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo
* When there are fewer than 10000 new location visits records for the previous day
* When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows
* When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows
* When the sites from the aggregated location visits don't perfectly match the known sites
* When there are duplicate aggregated location visits sites
* When less than 50% of sites had a healthy day of location visits
* When the number of active itype/location/stat group codes in Sierra and Redshift differs
* When there are duplicate active itype/location/stat group codes in Redshift
* When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated
Expand Down
2 changes: 2 additions & 0 deletions alarm_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms
from alarms.models.circ_trans_alarms import CircTransAlarms
from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms
from alarms.models.holds_alarms import HoldsAlarms
from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms
Expand Down Expand Up @@ -64,6 +65,7 @@ def _setup_alarms(self):
BranchCodesMapAlarms(self.redshift_client),
CircTransAlarms(self.redshift_client, self.sierra_client),
GranularLocationVisitsAlarms(self.redshift_client),
DailyLocationVisitsAlarms(self.redshift_client),
HoldsAlarms(self.redshift_client),
OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials),
PatronInfoAlarms(self.redshift_client, self.sierra_client),
Expand Down
84 changes: 84 additions & 0 deletions alarms/models/daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
import os

from alarms.alarm import Alarm
from datetime import timedelta
from helpers.query_helper import build_redshift_daily_location_visits_query
from nypl_py_utils.classes.s3_client import S3Client
from nypl_py_utils.functions.log_helper import create_log


class DailyLocationVisitsAlarms(Alarm):
def __init__(self, redshift_client):
super().__init__(redshift_client)
self.logger = create_log("daily_location_visits_alarms")

def run_checks(self):
date_to_test = (self.yesterday_date - timedelta(days=29)).isoformat()
self.logger.info(f"\nDAILY LOCATION VISITS: {date_to_test}\n")
s3_client = S3Client(
os.environ["SHOPPERTRAK_S3_BUCKET"], os.environ["SHOPPERTRAK_S3_RESOURCE"]
)
all_shoppertrak_sites = set(s3_client.fetch_cache())
s3_client.close()

redshift_table = "daily_location_visits" + self.redshift_suffix
redshift_query = build_redshift_daily_location_visits_query(
redshift_table, date_to_test
)

self.redshift_client.connect()
redshift_results = self.redshift_client.execute_query(redshift_query)
self.redshift_client.close_connection()

redshift_sites = []
redshift_healthy = []
for shoppertrak_site, is_all_healthy in redshift_results:
redshift_sites.append(shoppertrak_site)
redshift_healthy.append(int(is_all_healthy))

self.check_redshift_duplicate_sites_alarm(redshift_sites)
self.check_redshift_missing_sites_alarm(redshift_sites, all_shoppertrak_sites)
self.check_redshift_extra_sites_alarm(redshift_sites, all_shoppertrak_sites)
self.check_redshift_healthy_sites_alarm(redshift_healthy)

def check_redshift_duplicate_sites_alarm(self, redshift_sites):
seen_sites = set()
duplicate_sites = set()
for site in redshift_sites:
if site in seen_sites:
duplicate_sites.add(site)
seen_sites.add(site)

if duplicate_sites:
self.logger.error(
"The following ShopperTrak sites are duplicated: {}".format(
sorted(list(duplicate_sites))
)
)

def check_redshift_missing_sites_alarm(self, redshift_sites, all_sites):
missing_sites = all_sites.difference(set(redshift_sites))
if missing_sites:
self.logger.error(
"The following ShopperTrak sites are missing: {}".format(
sorted(list(missing_sites))
)
)

def check_redshift_extra_sites_alarm(self, redshift_sites, all_sites):
extra_sites = set(redshift_sites).difference(all_sites)
if extra_sites:
self.logger.error(
"The following unknown ShopperTrak site ids were found: {}".format(
sorted(list(extra_sites))
)
)

def check_redshift_healthy_sites_alarm(self, redshift_healthy):
percent_healthy = sum(redshift_healthy) / len(redshift_healthy)
if percent_healthy < 0.5:
self.logger.error(
"Only {0:.2f}% of ShopperTrak sites were healthy".format(
percent_healthy * 100
)
)
2 changes: 2 additions & 0 deletions config/devel.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: dev
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGkwZwYJKoZIhvcNAQcGoFowWAIBADBTBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDCu20jxZpTC9cf9V8QIBEIAmt4TZJ7JuFQ1C845HxG8wAXzC7SFHkMNe4U6rKlD1twveXygfiQc=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
2 changes: 2 additions & 0 deletions config/production.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: production
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDFrV5IoeP0tL98V0zgIBEIApBnZ9IWKJ/s6F++zu0rOeWwfB+Kkwh4aFt68vD7jv4LaO0zOppTOPycA=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
2 changes: 2 additions & 0 deletions config/qa.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ PLAINTEXT_VARIABLES:
ENVISIONWARE_DB_PORT: 3306
ENVISIONWARE_DB_NAME: lasttwodays
REDSHIFT_DB_NAME: qa
SHOPPERTRAK_S3_BUCKET: shoppertrak-sites
SHOPPERTRAK_S3_RESOURCE: site_ids.json
LOG_LEVEL: info
SIERRA_DB_HOST: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGwwagYJKoZIhvcNAQcGoF0wWwIBADBWBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDL6zq0QQYBhdW4rz8gIBEIApxucxIVAb1Ec4uHUAwxRZcvC8OAZxuj/oJAkhCPh8sPJJ08w3ECoXoKk=
SIERRA_DB_USER: AQECAHh7ea2tyZ6phZgT4B9BDKwguhlFtRC6hgt+7HbmeFsrsgAAAGcwZQYJKoZIhvcNAQcGoFgwVgIBADBRBgkqhkiG9w0BBwEwHgYJYIZIAWUDBAEuMBEEDBMTB2dgzYV4nlEtwwIBEIAky8apvCdg3fDGqaXd06Vq9U59XxG8qlcShPfW9Jp/JiaURhIH
Expand Down
9 changes: 9 additions & 0 deletions helpers/query_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';"
)

_REDSHIFT_DAILY_LOCATION_VISITS_QUERY = (
"SELECT shoppertrak_site_id, is_all_healthy FROM {table} "
"WHERE visits_date = '{date}';"
)

_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
"SELECT COUNT(id) FROM {table} "
"WHERE increment_start::DATE = '{date}' AND is_fresh;"
Expand Down Expand Up @@ -192,6 +197,10 @@ def build_redshift_circ_trans_query(table, date_field, date):
)


def build_redshift_daily_location_visits_query(table, date):
return _REDSHIFT_DAILY_LOCATION_VISITS_QUERY.format(table=table, date=date)


def build_redshift_location_visits_count_query(table, date):
return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)

Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
nypl-py-utils[mysql-client,postgresql-client,redshift-client,config-helper]==1.4.0
nypl-py-utils[mysql-client,postgresql-client,redshift-client,s3-client,config-helper]==1.6.2
selenium>=4.10.0
125 changes: 125 additions & 0 deletions tests/alarms/models/test_daily_location_visits_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import logging
import pytest

from alarms.models.daily_location_visits_alarms import DailyLocationVisitsAlarms
from datetime import date


class TestDailyLocationVisitsAlarms:
@pytest.fixture
def test_instance(self, mocker):
mocker.patch(
"alarms.models.daily_location_visits_alarms.S3Client.fetch_cache",
return_value=["aa", "bb", "cc"],
)
return DailyLocationVisitsAlarms(mocker.MagicMock())

def test_init(self, mocker):
daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
assert daily_location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert daily_location_visits_alarms.run_added_tests
assert daily_location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert daily_location_visits_alarms.yesterday == "2023-05-31"

def test_run_checks_no_alarm(self, mocker, caplog):
daily_location_visits_alarms = DailyLocationVisitsAlarms(mocker.MagicMock())
mock_s3_client = mocker.MagicMock()
mock_s3_constructor = mocker.patch(
"alarms.models.daily_location_visits_alarms.S3Client",
return_value=mock_s3_client,
)
mock_s3_client.fetch_cache.return_value = ["aa", "bb", "cc"]

mock_redshift_query = mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query",
return_value="redshift query",
)
daily_location_visits_alarms.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
daily_location_visits_alarms.run_checks()
assert caplog.text == ""

mock_s3_constructor.assert_called_once_with(
"test_shoppertrak_s3_bucket", "test_shoppertrak_s3_resource"
)
mock_s3_client.fetch_cache.assert_called_once()
mock_s3_client.close.assert_called_once()
daily_location_visits_alarms.redshift_client.connect.assert_called_once()
mock_redshift_query.assert_called_once_with(
"daily_location_visits_test_redshift_db", "2023-05-02"
)
daily_location_visits_alarms.redshift_client.execute_query.assert_called_once_with(
"redshift query"
)
daily_location_visits_alarms.redshift_client.close_connection.assert_called_once()

def test_run_checks_redshift_duplicate_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["bb", True],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert ("The following ShopperTrak sites are duplicated: ['bb']") in caplog.text

def test_run_checks_redshift_missing_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["cc", True],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "The following ShopperTrak sites are missing: ['bb']" in caplog.text

def test_run_checks_redshift_extra_sites_alarm(self, test_instance, mocker, caplog):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", True],
["cc", False],
["ee", True],
["dd", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert (
"The following unknown ShopperTrak site ids were found: ['dd', 'ee']"
) in caplog.text

def test_run_checks_redshift_healthy_sites_alarm(
self, test_instance, mocker, caplog
):
mocker.patch(
"alarms.models.daily_location_visits_alarms.build_redshift_daily_location_visits_query"
)
test_instance.redshift_client.execute_query.return_value = (
["aa", True],
["bb", False],
["cc", False],
)

with caplog.at_level(logging.ERROR):
test_instance.run_checks()
assert "Only 33.33% of ShopperTrak sites were healthy" in caplog.text
10 changes: 5 additions & 5 deletions tests/alarms/models/test_granular_location_visits_alarms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ def test_instance(self, mocker):
return GranularLocationVisitsAlarms(mocker.MagicMock())

def test_init(self, mocker):
location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock())
assert location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert location_visits_alarms.run_added_tests
assert location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert location_visits_alarms.yesterday == "2023-05-31"
gran_location_visits_alarms = GranularLocationVisitsAlarms(mocker.MagicMock())
assert gran_location_visits_alarms.redshift_suffix == "_test_redshift_db"
assert gran_location_visits_alarms.run_added_tests
assert gran_location_visits_alarms.yesterday_date == date(2023, 5, 31)
assert gran_location_visits_alarms.yesterday == "2023-05-31"

def test_run_checks_no_alarm(self, test_instance, mocker, caplog):
mock_redshift_count_query = mocker.patch(
Expand Down
2 changes: 2 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
"ENVISIONWARE_DB_PASSWORD": "test_envisionware_password",
"OVERDRIVE_USERNAME": "test_overdrive_username",
"OVERDRIVE_PASSWORD": "test_overdrive_password",
"SHOPPERTRAK_S3_BUCKET": "test_shoppertrak_s3_bucket",
"SHOPPERTRAK_S3_RESOURCE": "test_shoppertrak_s3_resource",
}


Expand Down
Loading