From 0d82cca6cf9226395aef8a302fd759f52d3cb911 Mon Sep 17 00:00:00 2001 From: Ian Costanzo Date: Thu, 16 May 2024 12:37:01 -0700 Subject: [PATCH] Check for mis-matched relationships Signed-off-by: Ian Costanzo --- scripts/detail_audit_report.py | 18 +++++++- scripts/detail_audit_report_2.py | 6 +-- scripts/orgbook_data_audit.py | 32 ++++++++++++- scripts/orgbook_data_load.py | 78 ++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 5 deletions(-) diff --git a/scripts/detail_audit_report.py b/scripts/detail_audit_report.py index 1925593a..11ba3e2b 100644 --- a/scripts/detail_audit_report.py +++ b/scripts/detail_audit_report.py @@ -23,6 +23,8 @@ from orgbook_data_load import ( get_orgbook_all_corps, get_orgbook_all_corps_csv, + get_orgbook_missing_relations, + get_orgbook_missing_relations_csv, get_event_proc_future_corps, get_event_proc_future_corps_csv, get_bc_reg_corps, @@ -51,6 +53,10 @@ (orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps_csv() else: (orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps(USE_LEAR=USE_LEAR) + if USE_CSV: + orgbook_corp_relations = get_orgbook_missing_relations_csv() + else: + orgbook_corp_relations = get_orgbook_missing_relations(USE_LEAR=USE_LEAR) # corps that are still in the event processor queue waiting to be processed (won't be in orgbook yet) if USE_CSV: @@ -65,7 +71,17 @@ (bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos) = get_bc_reg_corps(USE_LEAR=USE_LEAR) # do the orgbook/bc reg compare - wrong_bus_num = compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos, orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos, future_corps, USE_LEAR=USE_LEAR) + wrong_bus_num = compare_bc_reg_orgbook( + bc_reg_corp_types, + bc_reg_corp_names, + bc_reg_corp_infos, + orgbook_corp_types, + orgbook_corp_names, + orgbook_corp_infos, + orgbook_corp_relations, + future_corps, + USE_LEAR=USE_LEAR, + ) if 0 < len(wrong_bus_num) and not USE_LEAR: bn_requeue_sql = """ diff --git a/scripts/detail_audit_report_2.py b/scripts/detail_audit_report_2.py index cf568d92..edf560fb 100644 --- a/scripts/detail_audit_report_2.py +++ b/scripts/detail_audit_report_2.py @@ -9,8 +9,7 @@ import csv from config import get_connection, get_db_sql, get_sql_record_count, CORP_TYPES_IN_SCOPE, corp_num_with_prefix, bare_corp_num -from orgbook_data_load import get_orgbook_all_corps -from orgbook_data_audit import compare_bc_reg_orgbook +from orgbook_data_load import get_orgbook_all_corps, get_orgbook_missing_relations USE_LEAR = (os.environ.get('USE_LEAR', 'false').lower() == 'true') @@ -22,4 +21,5 @@ Reads from the orgbook database and compares: """ # read from orgbook database - orgbook_corp_types = get_orgbook_all_corps(USE_LEAR=USE_LEAR) + (orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) = get_orgbook_all_corps(USE_LEAR=USE_LEAR) + orgbook_corp_relations = get_orgbook_missing_relations(USE_LEAR=USE_LEAR) diff --git a/scripts/orgbook_data_audit.py b/scripts/orgbook_data_audit.py index 7c3f1dc7..44e73fc9 100644 --- a/scripts/orgbook_data_audit.py +++ b/scripts/orgbook_data_audit.py @@ -82,7 +82,17 @@ def compare_dates(orgbook_reg_dt, bc_reg_reg_dt, USE_LEAR: bool = False): else: return compare_dates_colin(orgbook_reg_dt, bc_reg_reg_dt) -def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_infos, orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos, future_corps, USE_LEAR: bool = False): +def compare_bc_reg_orgbook( + bc_reg_corp_types, + bc_reg_corp_names, + bc_reg_corp_infos, + orgbook_corp_types, + orgbook_corp_names, + orgbook_corp_infos, + orgbook_corp_relations, + future_corps, + USE_LEAR: bool = False, +): missing_in_orgbook = [] missing_in_bcreg = [] wrong_corp_type = [] @@ -157,6 +167,24 @@ def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_inf error_msgs += "OrgBook corp not in BC Reg: " + orgbook_corp + "\n" error_cmds += "./manage -p bc -e prod deleteTopic " + orgbook_corp + "\n" + # fixes for missing relationships + reln_hash = {} + reln_list = [] + if not USE_LEAR: + for relation in orgbook_corp_relations: + reln = relation['s_2'] + ":" + relation['s_1'] + if not reln in reln_hash: + reln_hash[reln] = reln + reln_list.append(reln) + error_msgs += "Missing relationship in OrgBook:" + reln + "\n" + reg_cmd = "queueOrgForRelnsUpdate" + corp_num = relation['s_2'] + if corp_num.startswith('FM'): + reg_cmd = "queueOrgForRelnsUpdateLear" + elif corp_num.startswith('BC'): + corp_num = corp_num[2:] + error_cmds += "./manage -e prod " + reg_cmd + " " + corp_num + " " + relation['s_1'] + "\n" + corp_errors = (len(missing_in_orgbook) + len(missing_in_bcreg) + len(wrong_corp_type) + @@ -179,6 +207,8 @@ def compare_bc_reg_orgbook(bc_reg_corp_types, bc_reg_corp_names, bc_reg_corp_inf error_summary += "Wrong business number: " + str(len(wrong_bus_num)) + " " + str(wrong_bus_num) + "\n" error_summary += "Wrong corp registration: " + str(len(wrong_corp_reg_dt)) + " " + str(wrong_corp_reg_dt) + "\n" error_summary += "Wrong corp jurisdiction: " + str(len(wrong_corp_juris)) + " " + str(wrong_corp_juris) + "\n" + if not USE_LEAR: + error_summary += "Mis-matched OrgBook relationships: " + str(len(reln_list)) + " " + str(reln_list) + "\n" if 0 < corp_errors: log_error(error_summary) diff --git a/scripts/orgbook_data_load.py b/scripts/orgbook_data_load.py index 6643b0a0..5b8d6ec4 100644 --- a/scripts/orgbook_data_load.py +++ b/scripts/orgbook_data_load.py @@ -356,6 +356,84 @@ def get_orgbook_all_corps_csv(): return (orgbook_corp_types, orgbook_corp_names, orgbook_corp_infos) +def get_orgbook_missing_relations(USE_LEAR: bool = False): + """ + Checks orgbook for missing/mis-matched relationships. + """ + conn = None + try: + conn = get_connection('org_book') + except (Exception) as error: + print(error) + raise + + # get all the mis-matched relationships from orgbook + print("Get corp relationships from OrgBook DB", datetime.datetime.now()) + orgbook_corp_infos = {} + with open('export/orgbook_corp_relations.csv', mode='w') as corp_file: + fieldnames = ["tr1_topic_id", "tr1_related_topic_id", "id_1", "s_1", "id_2", "s_2"] + corp_writer = csv.DictWriter(corp_file, fieldnames=fieldnames, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) + corp_writer.writeheader() + + if not USE_LEAR: + sql = """ + select tr1_topic_id, tr1_related_topic_id, + t1.id id_1, t1.source_id s_1, + t2.id id_2, t2.source_id s_2 + from ( + select * from ( + select + tr1.topic_id tr1_topic_id, + tr1.related_topic_id tr1_related_topic_id, + tr2.topic_id tr2_topic_id, + tr2.related_topic_id tr2_related_topic_id + from topic_relationship tr1 + full outer join topic_relationship tr2 + on tr1.topic_id = tr2.related_topic_id + and tr1.related_topic_id = tr2.topic_id + order by tr1.topic_id, tr1.related_topic_id + ) as related + where (related.tr2_topic_id is null + or related.tr2_related_topic_id is null) + ) as unrelated, + topic as t1, + topic as t2 + where t1.id = tr1_topic_id + and t2.id = tr1_related_topic_id + order by tr1_topic_id desc; + """ + + try: + cur = conn.cursor() + cur.execute(sql) + for row in cur: + write_corp = { + "tr1_topic_id": row[0], + "tr1_related_topic_id": row[1], + "id_1": row[2], + "s_1":row[3], + "id_2": row[4], + "s_2": row[5], + } + corp_writer.writerow(write_corp) + cur.close() + except (Exception) as error: + print(error) + raise + + return get_orgbook_missing_relations_csv() + + +def get_orgbook_missing_relations_csv(): + orgbook_corp_relations = [] + with open('export/orgbook_corp_relations.csv', mode='r') as corp_file: + corp_reader = csv.DictReader(corp_file) + for row in corp_reader: + orgbook_corp_relations.append(row) + + return orgbook_corp_relations + + def get_event_proc_future_corps(USE_LEAR: bool = False): """ Reads from the event processor database and writes to a csv file: