Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optionally import RIPE routes #23

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion bin/ripe_download
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -e
d=`date +%F`
mkdir $d
cd $d
for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz
for db in ripe.db.organisation.gz ripe.db.role.gz ripe.db.aut-num.gz ripe.db.inet6num.gz ripe.db.inetnum.gz ripe.db.route.gz ripe.db.route6.gz
do
echo "Downloading: " $db
curl -O "https://ftp.ripe.net/ripe/dbase/split/$db"
Expand Down
22 changes: 20 additions & 2 deletions intelmq_certbund_contact/ripe/ripe_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,14 @@ def add_common_args(parser):
parser.add_argument("--inet6num-file",
default='ripe.db.inet6num.gz',
help=("Specify the inet6num data file."))
parser.add_argument("--route-file",
default='ripe.db.route.gz',
help=("Specify the route data file."))
parser.add_argument("--route6-file",
default='ripe.db.route6.gz',
help=("Specify the route6 data file."))
parser.add_argument("--import-route-data", action='store_true',
help=("Whether to import/diff the route data."))
parser.add_argument("--ripe-delegated-file",
default='delegated-ripencc-latest',
help=("Name of the delegated-ripencc-latest file to"
Expand All @@ -81,7 +89,7 @@ def load_ripe_files(options) -> tuple:

Returns:
tuple of (asn_list, organisation_list, role_list, abusec_to_org,
inetnum_list, inet6num_list)
inetnum_list, inet6num_list, route_list, route6_list)
"""

# Step 1: read all files
Expand Down Expand Up @@ -123,6 +131,16 @@ def restrict_country(record):
verbose=options.verbose)
role_index = build_index(role_list, 'nic-hdl')

route_list = []
route6_list = []
if options.import_route_data:
route_list = parse_file(options.route_file,
('route', 'origin'),
verbose=options.verbose)
route6_list = parse_file(options.route6_file,
('route6', 'origin'),
verbose=options.verbose)

# Step 2: Prepare new data for insertion

(asn_list, asn_list_u, organisation_list, organisation_index) \
Expand Down Expand Up @@ -166,7 +184,7 @@ def restrict_country(record):


return (asn_list, organisation_list, role_list, abusec_to_org,
inetnum_list, inet6num_list)
inetnum_list, inet6num_list, route_list, route6_list)


def read_delegated_file(filename, country, verbose=False):
Expand Down
104 changes: 83 additions & 21 deletions intelmq_certbund_contact/ripe/ripe_import.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@

import sys
import psycopg2
import psycopg2.extras
import argparse
import collections
from datetime import datetime, timezone

import intelmq_certbund_contact.ripe.ripe_data as ripe_data


SOURCE_NAME = 'ripe'
BULK_PAGE_SIZE = 500


def remove_old_entries(cur, verbose):
def remove_old_entries(cur, verbose, delete_route_data=False):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this new parameter used?

"""Remove the entries imported by previous runs."""
if verbose:
print('** Removing old entries from database...')
Expand All @@ -51,6 +54,8 @@ def remove_old_entries(cur, verbose):
(SOURCE_NAME,))
cur.execute("DELETE FROM organisation_automatic WHERE import_source = %s;",
(SOURCE_NAME,))
cur.execute("DELETE FROM route_automatic WHERE import_source = %s;",
(SOURCE_NAME,))


def insert_new_network_entries(cur, network_list, key, verbose):
Expand Down Expand Up @@ -125,38 +130,51 @@ def insert_new_organisations(cur, organisation_list, verbose):

return mapping


def insert_new_asn_org_entries(cur, asn_list, mapping):
# many-to-many table organisation <-> as number
def _generate_asn_entries(asn_list, mapping):
insert_time = datetime.now(tz=timezone.utc)
Comment on lines -129 to +134
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this just refactoring or required for something else?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to give all bulk inserted entries the same import time

for entry in asn_list:
org_id = mapping[entry["org"][0]].get("org_id")
if org_id is None:
print("org_id None for AS organisation handle {!r}"
.format(entry["org"][0]))
continue
yield (org_id, entry['aut-num'][0][2:], SOURCE_NAME, insert_time)

cur.execute("""INSERT INTO organisation_to_asn_automatic
(organisation_automatic_id, asn,
import_source, import_time)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""",
(org_id, entry['aut-num'][0][2:], SOURCE_NAME))


def insert_new_network_org_entries(cur, org_net_mapping, mapping):
# many-to-many table organisation <-> network number
def insert_new_asn_org_entries(cur, asn_list, mapping):
# many-to-many table organisation <-> as number
psycopg2.extras.execute_values(
cur,
"""INSERT INTO organisation_to_asn_automatic
(organisation_automatic_id, asn,
import_source, import_time)
VALUES %s;""",
_generate_asn_entries(asn_list, mapping),
page_size=BULK_PAGE_SIZE,
)

def _generate_network_entries(org_net_mapping, mapping):
insert_time = datetime.now(tz=timezone.utc)
for org, networks in org_net_mapping.items():
org_id = mapping[org].get("org_id")
if org_id is None:
print("org_id None for network entry {!r}".format((org, networks)))
continue

for network_id in networks:
cur.execute("""INSERT INTO organisation_to_network_automatic
(organisation_automatic_id,
network_automatic_id,
import_source, import_time)
VALUES (%s, %s, %s, CURRENT_TIMESTAMP);""",
(org_id, network_id, SOURCE_NAME))
yield (org_id, network_id, SOURCE_NAME, insert_time)

def insert_new_network_org_entries(cur, org_net_mapping, mapping):
# many-to-many table organisation <-> network number
psycopg2.extras.execute_values(
cur,
"""INSERT INTO organisation_to_network_automatic
(organisation_automatic_id,
network_automatic_id,
import_source, import_time)
VALUES %s;""",
_generate_network_entries(org_net_mapping, mapping),
page_size=BULK_PAGE_SIZE,
)


def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose):
Expand Down Expand Up @@ -186,6 +204,30 @@ def insert_new_contact_entries(cur, role_list, abusec_to_org, mapping, verbose):
(email, mapping[orh]['org_id'], SOURCE_NAME))


def insert_new_routes(cur, route_list, key, verbose):
if verbose:
print('** Saving {} data to database...'.format(key))

insert_time = datetime.now(tz=timezone.utc)

def _gen():
for entry in route_list:
# 'origin' is the ASN. Some values contain what appears to be
# comments (e.g. "origin: # AS1234 # FOO") them which we need to
# strip.
asn = entry['origin'][0].split()[0][2:]
yield (entry[key][0], asn, SOURCE_NAME, insert_time)

psycopg2.extras.execute_values(
cur,
"""INSERT INTO route_automatic
(address, asn, import_source, import_time)
VALUES %s;""",
_gen(),
page_size=BULK_PAGE_SIZE,
)


def main():
parser = argparse.ArgumentParser(
description=""
Expand All @@ -197,21 +239,27 @@ def main():
ripe_data.add_db_args(parser)
ripe_data.add_common_args(parser)

parser.add_argument("--before-commit-command",
help=("SQL statement that is executed before committing"
" the changes. This can be used to e.g. cleanup"
" data that refers to the potentially changed"
" RIPE data."))

args = parser.parse_args()

if args.verbose:
print('Parsing RIPE database...')
print('------------------------')

(asn_list, organisation_list, role_list, abusec_to_org, inetnum_list,
inet6num_list) = ripe_data.load_ripe_files(args)
inet6num_list, route_list, route6_list) = ripe_data.load_ripe_files(args)

con = None
try:
con = psycopg2.connect(dsn=args.conninfo)
cur = con.cursor()

remove_old_entries(cur, args.verbose)
remove_old_entries(cur, args.verbose, args.import_route_data)

# network addresses
org_inet6_mapping = insert_new_network_entries(
Expand All @@ -235,6 +283,20 @@ def main():
insert_new_contact_entries(cur, role_list, abusec_to_org, mapping,
args.verbose)

#
# Routing
#
if args.import_route_data:
insert_new_routes(cur, route_list, 'route', args.verbose)
insert_new_routes(cur, route6_list, 'route6', args.verbose)

# run "before commit command"
if args.before_commit_command:
if args.verbose:
print('Running before commit command...')
print('------------------------')
cur.execute(args.before_commit_command)

# Commit all data
con.commit()
except psycopg2.DatabaseError as e:
Expand Down
17 changes: 17 additions & 0 deletions sql/initdb.sql
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,23 @@ CREATE INDEX fqdn_annotation_fqdn_idx
ON fqdn_annotation (fqdn_id);


-- Routing information, useful as a mapping from network addresses to
-- ASNs.
CREATE TABLE route_automatic (
route_automatic_id SERIAL PRIMARY KEY,
address CIDR NOT NULL,
asn BIGINT NOT NULL,
LIKE automatic_templ INCLUDING ALL,

-- The data from ripe.db.route.gz and ripe.db.route6.gz has cases
-- where the same network address is associated with multiple ASNs,
-- so we cannot have a constraint on just (address, import_source).
UNIQUE (address, asn, import_source)
);

CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic
USING gist (address inet_ops);


-- Information about national CERTs

Expand Down
18 changes: 18 additions & 0 deletions sql/update-route.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- Update script for the route_automatic table.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please mention this in UPDATE.md


CREATE TABLE route_automatic (
route_automatic_id SERIAL PRIMARY KEY,
address CIDR NOT NULL,
asn BIGINT NOT NULL,
import_source VARCHAR(500) NOT NULL,
import_time TIMESTAMP NOT NULL,

-- explicitly name the constraint to make sure it has the same name
-- as the constraint created by initdb.sql.
CONSTRAINT automatic_templ_import_source_check CHECK (import_source <> ''),

UNIQUE (address, asn, import_source)
);

CREATE INDEX route_automatic_cidr_gist_idx ON route_automatic
USING gist (address inet_ops);