Skip to content

Commit

Permalink
Merge pull request #38 from ianco/master
Browse files Browse the repository at this point in the history
Check for mis-matched relationships
  • Loading branch information
WadeBarnes authored May 24, 2024
2 parents bd5812a + 0d82cca commit 21da366
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 5 deletions.
18 changes: 17 additions & 1 deletion scripts/detail_audit_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from orgbook_data_load import (
get_orgbook_all_corps,
get_orgbook_all_corps_csv,
get_orgbook_missing_relations,
get_orgbook_missing_relations_csv,
get_event_proc_future_corps,
get_event_proc_future_corps_csv,
get_bc_reg_corps,
Expand Down Expand Up @@ -51,6 +53,10 @@
(orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps_csv()
else:
(orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps(USE_LEAR=USE_LEAR)
if USE_CSV:
orgbook_corp_relations = get_orgbook_missing_relations_csv()
else:
orgbook_corp_relations = get_orgbook_missing_relations(USE_LEAR=USE_LEAR)

# corps that are still in the event processor queue waiting to be processed (won't be in orgbook yet)
if USE_CSV:
Expand All @@ -65,7 +71,17 @@
(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos) = get_bc_reg_corps(USE_LEAR=USE_LEAR)

# do the orgbook/bc reg compare
wrong_bus_num = compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos, orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos, future_corps, USE_LEAR=USE_LEAR)
wrong_bus_num = compare_bc_reg_orgbook(
bc_reg_corp_types,
bc_reg_corp_names,
bc_reg_corp_infos,
orgbook_corp_types,
orgbook_corp_names,
orgbook_corp_infos,
orgbook_corp_relations,
future_corps,
USE_LEAR=USE_LEAR,
)

if 0 < len(wrong_bus_num) and not USE_LEAR:
bn_requeue_sql = """
Expand Down
6 changes: 3 additions & 3 deletions scripts/detail_audit_report_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
import csv

from config import get_connection, get_db_sql, get_sql_record_count, CORP_TYPES_IN_SCOPE, corp_num_with_prefix, bare_corp_num
from orgbook_data_load import get_orgbook_all_corps
from orgbook_data_audit import compare_bc_reg_orgbook
from orgbook_data_load import get_orgbook_all_corps, get_orgbook_missing_relations

USE_LEAR = (os.environ.get('USE_LEAR', 'false').lower() == 'true')

Expand All @@ -22,4 +21,5 @@
Reads from the orgbook database and compares:
"""
# read from orgbook database
orgbook_corp_types = get_orgbook_all_corps(USE_LEAR=USE_LEAR)
(orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps(USE_LEAR=USE_LEAR)
orgbook_corp_relations = get_orgbook_missing_relations(USE_LEAR=USE_LEAR)
32 changes: 31 additions & 1 deletion scripts/orgbook_data_audit.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,17 @@ def compare_dates(orgbook_reg_dt, bc_reg_reg_dt, USE_LEAR: bool = False):
else:
return compare_dates_colin(orgbook_reg_dt, bc_reg_reg_dt)

def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos, orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos, future_corps, USE_LEAR: bool = False):
def compare_bc_reg_orgbook(
bc_reg_corp_types,
bc_reg_corp_names,
bc_reg_corp_infos,
orgbook_corp_types,
orgbook_corp_names,
orgbook_corp_infos,
orgbook_corp_relations,
future_corps,
USE_LEAR: bool = False,
):
missing_in_orgbook = []
missing_in_bcreg = []
wrong_corp_type = []
Expand Down Expand Up @@ -157,6 +167,24 @@ def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_inf
error_msgs += "OrgBook corp not in BC Reg: " + orgbook_corp + "\n"
error_cmds += "./manage -p bc -e prod deleteTopic " + orgbook_corp + "\n"

# fixes for missing relationships
reln_hash = {}
reln_list = []
if not USE_LEAR:
for relation in orgbook_corp_relations:
reln = relation['s_2'] + ":" + relation['s_1']
if not reln in reln_hash:
reln_hash[reln] = reln
reln_list.append(reln)
error_msgs += "Missing relationship in OrgBook:" + reln + "\n"
reg_cmd = "queueOrgForRelnsUpdate"
corp_num = relation['s_2']
if corp_num.startswith('FM'):
reg_cmd = "queueOrgForRelnsUpdateLear"
elif corp_num.startswith('BC'):
corp_num = corp_num[2:]
error_cmds += "./manage -e prod " + reg_cmd + " " + corp_num + " " + relation['s_1'] + "\n"

corp_errors = (len(missing_in_orgbook) +
len(missing_in_bcreg) +
len(wrong_corp_type) +
Expand All @@ -179,6 +207,8 @@ def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_inf
error_summary += "Wrong business number: " + str(len(wrong_bus_num)) + " " + str(wrong_bus_num) + "\n"
error_summary += "Wrong corp registration: " + str(len(wrong_corp_reg_dt)) + " " + str(wrong_corp_reg_dt) + "\n"
error_summary += "Wrong corp jurisdiction: " + str(len(wrong_corp_juris)) + " " + str(wrong_corp_juris) + "\n"
if not USE_LEAR:
error_summary += "Mis-matched OrgBook relationships: " + str(len(reln_list)) + " " + str(reln_list) + "\n"

if 0 < corp_errors:
log_error(error_summary)
Expand Down
78 changes: 78 additions & 0 deletions scripts/orgbook_data_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,84 @@ def get_orgbook_all_corps_csv():
return (orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos)


def get_orgbook_missing_relations(USE_LEAR: bool = False):
"""
Checks orgbook for missing/mis-matched relationships.
"""
conn = None
try:
conn = get_connection('org_book')
except (Exception) as error:
print(error)
raise

# get all the mis-matched relationships from orgbook
print("Get corp relationships from OrgBook DB", datetime.datetime.now())
orgbook_corp_infos = {}
with open('export/orgbook_corp_relations.csv', mode='w') as corp_file:
fieldnames = ["tr1_topic_id", "tr1_related_topic_id", "id_1", "s_1", "id_2", "s_2"]
corp_writer = csv.DictWriter(corp_file, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
corp_writer.writeheader()

if not USE_LEAR:
sql = """
select tr1_topic_id, tr1_related_topic_id,
t1.id id_1, t1.source_id s_1,
t2.id id_2, t2.source_id s_2
from (
select * from (
select
tr1.topic_id tr1_topic_id,
tr1.related_topic_id tr1_related_topic_id,
tr2.topic_id tr2_topic_id,
tr2.related_topic_id tr2_related_topic_id
from topic_relationship tr1
full outer join topic_relationship tr2
on tr1.topic_id = tr2.related_topic_id
and tr1.related_topic_id = tr2.topic_id
order by tr1.topic_id, tr1.related_topic_id
) as related
where (related.tr2_topic_id is null
or related.tr2_related_topic_id is null)
) as unrelated,
topic as t1,
topic as t2
where t1.id = tr1_topic_id
and t2.id = tr1_related_topic_id
order by tr1_topic_id desc;
"""

try:
cur = conn.cursor()
cur.execute(sql)
for row in cur:
write_corp = {
"tr1_topic_id": row[0],
"tr1_related_topic_id": row[1],
"id_1": row[2],
"s_1":row[3],
"id_2": row[4],
"s_2": row[5],
}
corp_writer.writerow(write_corp)
cur.close()
except (Exception) as error:
print(error)
raise

return get_orgbook_missing_relations_csv()


def get_orgbook_missing_relations_csv():
orgbook_corp_relations = []
with open('export/orgbook_corp_relations.csv', mode='r') as corp_file:
corp_reader = csv.DictReader(corp_file)
for row in corp_reader:
orgbook_corp_relations.append(row)

return orgbook_corp_relations


def get_event_proc_future_corps(USE_LEAR: bool = False):
"""
Reads from the event processor database and writes to a csv file:
Expand Down

0 comments on commit 21da366

Please sign in to comment.