diff --git a/bin/ripe_download b/bin/ripe_download index 5536379..d43a551 100755 --- a/bin/ripe_download +++ b/bin/ripe_download @@ -5,7 +5,7 @@ set -e d=`date +%F` mkdir $d cd $d -for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz +for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz ripe.db.route.gz ripe.db.route6.gz do echo "Downloading: " $db curl -O "https://ftp.ripe.net/ripe/dbase/split/$db" diff --git a/intelmq_certbund_contact/ripe/ripe_data.py b/intelmq_certbund_contact/ripe/ripe_data.py index 41f1683..1324f4b 100644 --- a/intelmq_certbund_contact/ripe/ripe_data.py +++ b/intelmq_certbund_contact/ripe/ripe_data.py @@ -55,6 +55,14 @@ def add_common_args(parser): parser.add_argument("--inet6num-file", default='ripe.db.inet6num.gz', help=("Specify the inet6num data file.")) + parser.add_argument("--route-file", + default='ripe.db.route.gz', + help=("Specify the route data file.")) + parser.add_argument("--route6-file", + default='ripe.db.route6.gz', + help=("Specify the route6 data file.")) + parser.add_argument("--import-route-data", action='store_true', + help=("Whether to import/diff the route data.")) parser.add_argument("--ripe-delegated-file", default='delegated-ripencc-latest', help=("Name of the delegated-ripencc-latest file to" @@ -81,7 +89,7 @@ def load_ripe_files(options) -> tuple: Returns: tuple of (asn_list, organisation_list, role_list, abusec_to_org, - inetnum_list, inet6num_list) + inetnum_list, inet6num_list, route_list, route6_list) """ # Step 1: read all files @@ -123,6 +131,16 @@ def restrict_country(record): verbose=options.verbose) role_index = build_index(role_list, 'nic-hdl') + route_list = [] + route6_list = [] + if options.import_route_data: + route_list = parse_file(options.route_file, + ('route', 'origin'), + verbose=options.verbose) + route6_list = parse_file(options.route6_file, + ('route6', 'origin'), + verbose=options.verbose) + # Step 2: Prepare new data for insertion (asn_list, asn_list_u, organisation_list, organisation_index) \ @@ -166,7 +184,7 @@ def restrict_country(record): return (asn_list, organisation_list, role_list, abusec_to_org, - inetnum_list, inet6num_list) + inetnum_list, inet6num_list, route_list, route6_list) def read_delegated_file(filename, country, verbose=False): diff --git a/intelmq_certbund_contact/ripe/ripe_import.py b/intelmq_certbund_contact/ripe/ripe_import.py index c232b31..c5671f8 100755 --- a/intelmq_certbund_contact/ripe/ripe_import.py +++ b/intelmq_certbund_contact/ripe/ripe_import.py @@ -28,16 +28,19 @@ import sys import psycopg2 +import psycopg2.extras import argparse import collections +from datetime import datetime, timezone import intelmq_certbund_contact.ripe.ripe_data as ripe_data SOURCE_NAME = 'ripe' +BULK_PAGE_SIZE = 500 -def remove_old_entries(cur, verbose): +def remove_old_entries(cur, verbose, delete_route_data=False): """Remove the entries imported by previous runs.""" if verbose: print('** Removing old entries from database...') @@ -51,6 +54,8 @@ def remove_old_entries(cur, verbose): (SOURCE_NAME,)) cur.execute("DELETE FROM organisation_automatic WHERE import_source = %s;", (SOURCE_NAME,)) + cur.execute("DELETE FROM route_automatic WHERE import_source = %s;", + (SOURCE_NAME,)) def insert_new_network_entries(cur, network_list, key, verbose): @@ -125,25 +130,30 @@ def insert_new_organisations(cur, organisation_list, verbose): return mapping - -def insert_new_asn_org_entries(cur, asn_list, mapping): - # many-to-many table organisation <-> as number +def _generate_asn_entries(asn_list, mapping): + insert_time = datetime.now(tz=timezone.utc) for entry in asn_list: org_id = mapping[entry["org"][0]].get("org_id") if org_id is None: print("org_id None for AS organisation handle {!r}" .format(entry["org"][0])) continue + yield (org_id, entry['aut-num'][0][2:], SOURCE_NAME, insert_time) - cur.execute("""INSERT INTO organisation_to_asn_automatic - (organisation_automatic_id, asn, - import_source, import_time) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""", - (org_id, entry['aut-num'][0][2:], SOURCE_NAME)) - - -def insert_new_network_org_entries(cur, org_net_mapping, mapping): - # many-to-many table organisation <-> network number +def insert_new_asn_org_entries(cur, asn_list, mapping): + # many-to-many table organisation <-> as number + psycopg2.extras.execute_values( + cur, + """INSERT INTO organisation_to_asn_automatic + (organisation_automatic_id, asn, + import_source, import_time) + VALUES %s;""", + _generate_asn_entries(asn_list, mapping), + page_size=BULK_PAGE_SIZE, + ) + +def _generate_network_entries(org_net_mapping, mapping): + insert_time = datetime.now(tz=timezone.utc) for org, networks in org_net_mapping.items(): org_id = mapping[org].get("org_id") if org_id is None: @@ -151,12 +161,20 @@ def insert_new_network_org_entries(cur, org_net_mapping, mapping): continue for network_id in networks: - cur.execute("""INSERT INTO organisation_to_network_automatic - (organisation_automatic_id, - network_automatic_id, - import_source, import_time) - VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""", - (org_id, network_id, SOURCE_NAME)) + yield (org_id, network_id, SOURCE_NAME, insert_time) + +def insert_new_network_org_entries(cur, org_net_mapping, mapping): + # many-to-many table organisation <-> network number + psycopg2.extras.execute_values( + cur, + """INSERT INTO organisation_to_network_automatic + (organisation_automatic_id, + network_automatic_id, + import_source, import_time) + VALUES %s;""", + _generate_network_entries(org_net_mapping, mapping), + page_size=BULK_PAGE_SIZE, + ) def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose): @@ -186,6 +204,30 @@ def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose): (email, mapping[orh]['org_id'], SOURCE_NAME)) +def insert_new_routes(cur, route_list, key, verbose): + if verbose: + print('** Saving {} data to database...'.format(key)) + + insert_time = datetime.now(tz=timezone.utc) + + def _gen(): + for entry in route_list: + # 'origin' is the ASN. Some values contain what appears to be + # comments (e.g. "origin: # AS1234 # FOO") them which we need to + # strip. + asn = entry['origin'][0].split()[0][2:] + yield (entry[key][0], asn, SOURCE_NAME, insert_time) + + psycopg2.extras.execute_values( + cur, + """INSERT INTO route_automatic + (address, asn, import_source, import_time) + VALUES %s;""", + _gen(), + page_size=BULK_PAGE_SIZE, + ) + + def main(): parser = argparse.ArgumentParser( description="" @@ -197,6 +239,12 @@ def main(): ripe_data.add_db_args(parser) ripe_data.add_common_args(parser) + parser.add_argument("--before-commit-command", + help=("SQL statement that is executed before committing" + " the changes. This can be used to e.g. cleanup" + " data that refers to the potentially changed" + " RIPE data.")) + args = parser.parse_args() if args.verbose: @@ -204,14 +252,14 @@ def main(): print('------------------------') (asn_list, organisation_list, role_list, abusec_to_org, inetnum_list, - inet6num_list) = ripe_data.load_ripe_files(args) + inet6num_list, route_list, route6_list) = ripe_data.load_ripe_files(args) con = None try: con = psycopg2.connect(dsn=args.conninfo) cur = con.cursor() - remove_old_entries(cur, args.verbose) + remove_old_entries(cur, args.verbose, args.import_route_data) # network addresses org_inet6_mapping = insert_new_network_entries( @@ -235,6 +283,20 @@ def main(): insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, args.verbose) + # + # Routing + # + if args.import_route_data: + insert_new_routes(cur, route_list, 'route', args.verbose) + insert_new_routes(cur, route6_list, 'route6', args.verbose) + + # run "before commit command" + if args.before_commit_command: + if args.verbose: + print('Running before commit command...') + print('------------------------') + cur.execute(args.before_commit_command) + # Commit all data con.commit() except psycopg2.DatabaseError as e: diff --git a/sql/initdb.sql b/sql/initdb.sql index c8d7018..34e4145 100644 --- a/sql/initdb.sql +++ b/sql/initdb.sql @@ -246,6 +246,23 @@ CREATE INDEX fqdn_annotation_fqdn_idx ON fqdn_annotation (fqdn_id); +-- Routing information, useful as a mapping from network addresses to +-- ASNs. +CREATE TABLE route_automatic ( + route_automatic_id SERIAL PRIMARY KEY, + address CIDR NOT NULL, + asn BIGINT NOT NULL, + LIKE automatic_templ INCLUDING ALL, + + -- The data from ripe.db.route.gz and ripe.db.route6.gz has cases + -- where the same network address is associated with multiple ASNs, + -- so we cannot have a constraint on just (address, import_source). + UNIQUE (address, asn, import_source) +); + +CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic + USING gist (address inet_ops); + -- Information about national CERTs diff --git a/sql/update-route.sql b/sql/update-route.sql new file mode 100644 index 0000000..ce064b6 --- /dev/null +++ b/sql/update-route.sql @@ -0,0 +1,18 @@ +-- Update script for the route_automatic table. + +CREATE TABLE route_automatic ( + route_automatic_id SERIAL PRIMARY KEY, + address CIDR NOT NULL, + asn BIGINT NOT NULL, + import_source VARCHAR(500) NOT NULL, + import_time TIMESTAMP NOT NULL, + + -- explicitly name the constraint to make sure it has the same name + -- as the constraint created by initdb.sql. + CONSTRAINT automatic_templ_import_source_check CHECK (import_source <> ''), + + UNIQUE (address, asn, import_source) +); + +CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic + USING gist (address inet_ops);