Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add BranchCodesMap alarms #27

Merged
merged 5 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
## 2024-10-30
### Added
- Add BranchCodesMap alarms checking that it's in sync with all branches with location hours

### Fixed
- Rename LocationVisits alarms to GranularLocationVisits alarms and remove unnecessary alarms
- Delete old chrome_installation file

## 2024-09-19
### Added
- Refactor code to run in an ECS cluster rather than as a Lambda
Expand Down
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ help:
@echo "make test"
@echo " run associated test suite with pytest"
@echo "make lint"
@echo " lint project files using the flake8 linter"
@echo " lint project files using the black linter"

run:
export ENVIRONMENT=devel; \
python -c 'import lambda_function; lambda_function.lambda_handler(None, None)'
python main.py

test:
pytest tests -W ignore::DeprecationWarning
Expand Down
15 changes: 8 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,12 @@ Currently, the code will log an error (triggering an alarm to fire) under the fo
* When there are no OverDrive checkout records online (via OverDrive Marketplace) for the previous day
* When the number of newly created/deleted patron records in Sierra and Redshift differs for any day in the previous week
* When there are no newly created patron records in Sierra for the previous any day in the previous week
* When a single Sierra branch code maps to multiple Drupal branch codes
* When a Drupal branch code in location_hours does not contain a mapping to a Sierra branch code
* When a Sierra branch code with a mapping to a Drupal branch code does not appear in location_hours
* When there are fewer than 10000 new location visits records for the previous day
* When a given location visits (site id, orbit, increment start) combination does not map to exactly one fresh row
* When a given location visits (site id, orbit, increment start) combination from the previous day contains multiple fresh rows
* When a given location visits (site id, orbit, increment start) combination from the previous thirty days contains only stale rows
* When the number of active itype/location/stat group codes in Sierra and Redshift differs
* When there are duplicate active itype/location/stat group codes in Redshift
* When there are active itype/location/stat group codes in Redshift without the necessary additional fields populated
Expand All @@ -38,14 +42,11 @@ make run
The application logs should output to your terminal.

* Export your `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` and run `make run`
* Alternatively, to build and run a Docker container, copy the `configs` directory in the `Dockerfile` and run:
* Alternatively, to build and run a Docker container, run:
```
docker build --platform linux/amd64 -t bic-alarms:local .
docker image build -t bic-alarms:local .

docker run --platform linux/amd64 -p 9000:8080 -e ENVIRONMENT=devel -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> bic-alarms:local

# From a new terminal tab
curl "http://localhost:9000/2015-03-31/functions/function/invocations" -d '{}'
docker container run -e ENVIRONMENT=<env> -e AWS_ACCESS_KEY_ID=<> -e AWS_SECRET_ACCESS_KEY=<> bic-alarms:local
```

## Git workflow
Expand Down
6 changes: 4 additions & 2 deletions alarm_controller.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import os

from alarms.models.branch_codes_map_alarms import BranchCodesMapAlarms
from alarms.models.circ_trans_alarms import CircTransAlarms
from alarms.models.granular_location_visits_alarms import GranularLocationVisitsAlarms
from alarms.models.holds_alarms import HoldsAlarms
from alarms.models.location_visits_alarms import LocationVisitsAlarms
from alarms.models.overdrive_checkouts_alarms import OverDriveCheckoutsAlarms
from alarms.models.patron_info_alarms import PatronInfoAlarms
from alarms.models.pc_reserve_alarms import PcReserveAlarms
Expand Down Expand Up @@ -60,9 +61,10 @@ def _setup_database_clients(self, kms_client):
def _setup_alarms(self):
self.logger.info("Setting up alarms...")
return [
BranchCodesMapAlarms(self.redshift_client),
CircTransAlarms(self.redshift_client, self.sierra_client),
GranularLocationVisitsAlarms(self.redshift_client),
HoldsAlarms(self.redshift_client),
LocationVisitsAlarms(self.redshift_client),
OverDriveCheckoutsAlarms(self.redshift_client, self.overdrive_credentials),
PatronInfoAlarms(self.redshift_client, self.sierra_client),
PcReserveAlarms(self.redshift_client, self.envisionware_client),
Expand Down
59 changes: 59 additions & 0 deletions alarms/models/branch_codes_map_alarms.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
from alarms.alarm import Alarm
from helpers.query_helper import (
build_redshift_branch_codes_duplicate_query,
build_redshift_branch_codes_hours_query,
)
from nypl_py_utils.functions.log_helper import create_log


class BranchCodesMapAlarms(Alarm):
def __init__(self, redshift_client):
super().__init__(redshift_client)
self.logger = create_log("branch_codes_map_alarms")

def run_checks(self):
self.logger.info("\nBRANCH CODES MAP\n")
branch_codes_table = "branch_codes_map" + self.redshift_suffix
location_hours_table = "location_hours" + self.redshift_suffix

self.redshift_client.connect()
duplicates = self.redshift_client.execute_query(
build_redshift_branch_codes_duplicate_query(branch_codes_table)
)
if self.run_added_tests:
mismatched_hours = self.redshift_client.execute_query(
build_redshift_branch_codes_hours_query(
location_hours_table, branch_codes_table
)
)
else:
mismatched_hours = []
self.redshift_client.close_connection()

ids_not_in_map = [el[0] for el in mismatched_hours if el[1] is None]
ids_not_in_hours = [el[1] for el in mismatched_hours if el[0] is None]
self.check_duplicate_sierra_codes(duplicates)
self.check_hours_ids_without_mapping(ids_not_in_map)
self.check_map_ids_without_hours(ids_not_in_hours)

def check_duplicate_sierra_codes(self, duplicates):
if len(duplicates) > 0:
self.logger.error(
"The following Sierra branch codes map to more than one Drupal branch "
"code: {}".format(duplicates)
)

def check_hours_ids_without_mapping(self, ids_not_in_map):
if len(ids_not_in_map) > 0:
self.logger.error(
"The following Drupal branch codes have location hours but do not have "
"a known Sierra branch mapping: {}".format(ids_not_in_map)
)

def check_map_ids_without_hours(self, ids_not_in_hours):
if len(ids_not_in_hours) > 0:
self.logger.error(
"The following Sierra branch codes do not have known hours: {}".format(
ids_not_in_hours
)
)
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@
from nypl_py_utils.functions.log_helper import create_log


class LocationVisitsAlarms(Alarm):
class GranularLocationVisitsAlarms(Alarm):
def __init__(self, redshift_client):
super().__init__(redshift_client)
self.logger = create_log("location_visits_alarms")
self.logger = create_log("granular_location_visits_alarms")

def run_checks(self):
if not self.run_added_tests:
return

self.logger.info("\nLOCATION VISITS\n")
self.logger.info("\nGRANULAR LOCATION VISITS\n")
redshift_table = "location_visits" + self.redshift_suffix
stale_start_date = (self.yesterday_date - timedelta(days=30)).isoformat()
redshift_count_query = build_redshift_location_visits_count_query(
Expand All @@ -40,13 +40,13 @@ def run_checks(self):
redshift_stale_rows = self.redshift_client.execute_query(redshift_stale_query)
self.redshift_client.close_connection()

self.new_location_visits_less_than_ten_thousand_alarm(
self.check_location_visits_less_than_ten_thousand_alarm(
redshift_count, redshift_table
)
self.check_redshift_duplicates_alarm(redshift_duplicates)
self.check_redshift_stale_rows_alarm(redshift_stale_rows)

def new_location_visits_less_than_ten_thousand_alarm(
def check_location_visits_less_than_ten_thousand_alarm(
self, redshift_count, redshift_table
):
if redshift_count < 10000:
Expand Down
21 changes: 0 additions & 21 deletions chrome_installer.sh

This file was deleted.

99 changes: 60 additions & 39 deletions helpers/query_helper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,46 @@
### REDSHIFT QUERIES -- in order of _setup_alarms placement ###
_REDSHIFT_BRANCH_CODES_DUPLICATE_QUERY = """
SELECT sierra_code FROM {}
GROUP BY sierra_code
HAVING COUNT(*) > 1;"""

_REDSHIFT_BRANCH_CODES_HOURS_QUERY = """
SELECT drupal_location_id, sierra_code
FROM {location_hours_table} FULL JOIN {branch_codes_table}
ON {location_hours_table}.drupal_location_id = {branch_codes_table}.drupal_code
WHERE drupal_location_id IS NULL OR sierra_code IS NULL;"""

_REDSHIFT_CIRC_TRANS_QUERY = (
"SELECT COUNT(*) FROM {table} WHERE {date_field} = '{date}';"
)

_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
"SELECT COUNT(id) FROM {table} "
"WHERE increment_start::DATE = '{date}' AND is_fresh;"
)

_REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY = """
SELECT shoppertrak_site_id, orbit, increment_start
FROM {table}
WHERE increment_start::DATE = '{date}' AND is_fresh
GROUP BY shoppertrak_site_id, orbit, increment_start
HAVING COUNT(*) > 1;"""

_REDSHIFT_LOCATION_VISITS_STALE_QUERY = """
WITH stale_keys AS (
SELECT shoppertrak_site_id, orbit, increment_start,
CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) AS key
FROM {table}
WHERE poll_date >= '{date}' AND NOT is_fresh
)
SELECT shoppertrak_site_id, orbit, increment_start
FROM stale_keys
WHERE key NOT IN (
SELECT CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start)
FROM {table}
WHERE poll_date >= '{date}' AND is_fresh
);"""

_REDSHIFT_HOLDS_QUERY = (
"SELECT COUNT(id) FROM {table} WHERE TRUNC(update_timestamp) = '{date}';"
)
Expand Down Expand Up @@ -51,33 +89,6 @@
)
);"""

_REDSHIFT_LOCATION_VISITS_COUNT_QUERY = (
"SELECT COUNT(id) FROM {table} "
"WHERE increment_start::DATE = '{date}' AND is_fresh;"
)

_REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY = """
SELECT shoppertrak_site_id, orbit, increment_start
FROM {table}
WHERE increment_start::DATE = '{date}' AND is_fresh
GROUP BY shoppertrak_site_id, orbit, increment_start
HAVING COUNT(*) > 1;"""

_REDSHIFT_LOCATION_VISITS_STALE_QUERY = """
WITH stale_keys AS (
SELECT shoppertrak_site_id, orbit, increment_start,
CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start) AS key
FROM {table}
WHERE poll_date >= '{date}' AND NOT is_fresh
)
SELECT shoppertrak_site_id, orbit, increment_start
FROM stale_keys
WHERE key NOT IN (
SELECT CONCAT(CONCAT(shoppertrak_site_id, orbit), increment_start)
FROM {table}
WHERE poll_date >= '{date}' AND is_fresh
);"""

_REDSHIFT_OVERDRIVE_QUERY = (
"SELECT COUNT(*) FROM {table} WHERE transaction_et = '{date}';"
)
Expand Down Expand Up @@ -165,12 +176,34 @@
)


def build_redshift_branch_codes_duplicate_query(table):
return _REDSHIFT_BRANCH_CODES_DUPLICATE_QUERY.format(table)


def build_redshift_branch_codes_hours_query(location_hours_table, branch_codes_table):
return _REDSHIFT_BRANCH_CODES_HOURS_QUERY.format(
location_hours_table=location_hours_table, branch_codes_table=branch_codes_table
)


def build_redshift_circ_trans_query(table, date_field, date):
return _REDSHIFT_CIRC_TRANS_QUERY.format(
table=table, date_field=date_field, date=date
)


def build_redshift_location_visits_count_query(table, date):
return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)


def build_redshift_location_visits_duplicate_query(table, date):
return _REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY.format(table=table, date=date)


def build_redshift_location_visits_stale_query(table, date):
return _REDSHIFT_LOCATION_VISITS_STALE_QUERY.format(table=table, date=date)


def build_redshift_holds_query(table, date):
return _REDSHIFT_HOLDS_QUERY.format(table=table, date=date)

Expand All @@ -187,18 +220,6 @@ def build_redshift_holds_null_query(table, date):
return _REDSHIFT_HOLDS_NULL_QUERY.format(table=table, date=date)


def build_redshift_location_visits_count_query(table, date):
return _REDSHIFT_LOCATION_VISITS_COUNT_QUERY.format(table=table, date=date)


def build_redshift_location_visits_duplicate_query(table, date):
return _REDSHIFT_LOCATION_VISITS_DUPLICATE_QUERY.format(table=table, date=date)


def build_redshift_location_visits_stale_query(table, date):
return _REDSHIFT_LOCATION_VISITS_STALE_QUERY.format(table=table, date=date)


def build_redshift_overdrive_query(table, date):
return _REDSHIFT_OVERDRIVE_QUERY.format(table=table, date=date)

Expand Down
Loading
Loading