diff --git a/tools/add_input_name_as_column/add_input_name_as_column.py b/tools/add_input_name_as_column/add_input_name_as_column.py
index 3ab71509d06..b8b8b71da3b 100644
--- a/tools/add_input_name_as_column/add_input_name_as_column.py
+++ b/tools/add_input_name_as_column/add_input_name_as_column.py
@@ -4,11 +4,28 @@
 
 def Parser():
     the_parser = argparse.ArgumentParser(description="add label to last column of file")
-    the_parser.add_argument('--input', required=True, action="store", type=str, help="input tabular file")
-    the_parser.add_argument('--output', required=True, action="store", type=str, help="output file path")
-    the_parser.add_argument('--label', required=True, action="store", type=str, help="label to add in last column")
-    the_parser.add_argument('--header', action="store", type=str, help="column label for last column")
-    the_parser.add_argument('--prepend', action='store_true', default=False, help='Prepend column instead of appending' )
+    the_parser.add_argument(
+        "--input", required=True, action="store", type=str, help="input tabular file"
+    )
+    the_parser.add_argument(
+        "--output", required=True, action="store", type=str, help="output file path"
+    )
+    the_parser.add_argument(
+        "--label",
+        required=True,
+        action="store",
+        type=str,
+        help="label to add in last column",
+    )
+    the_parser.add_argument(
+        "--header", action="store", type=str, help="column label for last column"
+    )
+    the_parser.add_argument(
+        "--prepend",
+        action="store_true",
+        default=False,
+        help="Prepend column instead of appending",
+    )
 
     args = the_parser.parse_args()
     return args
@@ -17,9 +34,11 @@ def Parser():
 args = Parser()
 
 
-with io.open(args.input, encoding="utf-8") as input, io.open(args.output, 'w', encoding="utf-8") as output:
+with io.open(args.input, encoding="utf-8") as input, io.open(
+    args.output, "w", encoding="utf-8"
+) as output:
     for i, line in enumerate(input):
-        line = line.strip('\n')
+        line = line.strip("\n")
         if (i == 0) and args.header:
             new_entry = args.header
         else:
diff --git a/tools/column_order_header_sort/column_order_header_sort.py b/tools/column_order_header_sort/column_order_header_sort.py
index 43254aad895..0fcdcc0b2a8 100644
--- a/tools/column_order_header_sort/column_order_header_sort.py
+++ b/tools/column_order_header_sort/column_order_header_sort.py
@@ -11,25 +11,35 @@
 key_column = sys.argv[4]
 
 try:
-    key_column = int( key_column ) - 1
+    key_column = int(key_column) - 1
 except Exception:
     key_column = None
 
 header = None
-with open( input_filename, 'r' ) as fh:
-    header = fh.readline().strip( '\r\n' )
-header = header.split( delimiter )
-assert len( header ) == len( set( header ) ), "Header values must be unique"
-sorted_header = list( header )
+with open(input_filename, "r") as fh:
+    header = fh.readline().strip("\r\n")
+header = header.split(delimiter)
+assert len(header) == len(set(header)), "Header values must be unique"
+sorted_header = list(header)
 if key_column is None:
     columns = []
 else:
-    columns = [ key_column ]
-    sorted_header.pop( key_column )
+    columns = [key_column]
+    sorted_header.pop(key_column)
 sorted_header.sort()
 
 for key in sorted_header:
-    columns.append( header.index( key ) )
+    columns.append(header.index(key))
 
-awk_cmd = AWK_CMD % ( delimiter, delimiter, ",".join( map( lambda x: "$%i" % ( x + 1 ), columns ) ) )
-sys.exit( subprocess.call( [ 'gawk', awk_cmd, input_filename ], stdout=open( output_filename, 'wb+' ), shell=False ) )
+awk_cmd = AWK_CMD % (
+    delimiter,
+    delimiter,
+    ",".join(map(lambda x: "$%i" % (x + 1), columns)),
+)
+sys.exit(
+    subprocess.call(
+        ["gawk", awk_cmd, input_filename],
+        stdout=open(output_filename, "wb+"),
+        shell=False,
+    )
+)
diff --git a/tools/cwpair2/cwpair2.py b/tools/cwpair2/cwpair2.py
index 1a50d75850d..0ea75ea084a 100644
--- a/tools/cwpair2/cwpair2.py
+++ b/tools/cwpair2/cwpair2.py
@@ -17,18 +17,63 @@
 
 import cwpair2_util
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--input', dest='inputs', action='append', nargs=2, help="Input datasets")
-    parser.add_argument('--method', dest='method', default='mode', help='Method of finding match.')
-    parser.add_argument('--up_distance', dest='up_distance', type=int, default=50, help='Distance upstream from a pair.')
-    parser.add_argument('--down_distance', dest='down_distance', type=int, default=100, help='Distance downstream of a pair.')
-    parser.add_argument('--binsize', dest='binsize', type=int, default=1, help='Width of bins for plots and mode.')
-    parser.add_argument('--threshold_format', dest='threshold_format', help='Percentage to filter the 95th percentile.')
-    parser.add_argument('--relative_threshold', dest='relative_threshold', type=float, default=0.0, help='Percentage to filter the 95th percentile.')
-    parser.add_argument('--absolute_threshold', dest='absolute_threshold', type=float, default=0.0, help='Absolute value to filter.')
-    parser.add_argument('--output_files', dest='output_files', default='matched_pair', help='Restrict output dataset collections.')
-    parser.add_argument('--statistics_output', dest='statistics_output', help='Statistics output file.')
+    parser.add_argument(
+        "--input", dest="inputs", action="append", nargs=2, help="Input datasets"
+    )
+    parser.add_argument(
+        "--method", dest="method", default="mode", help="Method of finding match."
+    )
+    parser.add_argument(
+        "--up_distance",
+        dest="up_distance",
+        type=int,
+        default=50,
+        help="Distance upstream from a pair.",
+    )
+    parser.add_argument(
+        "--down_distance",
+        dest="down_distance",
+        type=int,
+        default=100,
+        help="Distance downstream of a pair.",
+    )
+    parser.add_argument(
+        "--binsize",
+        dest="binsize",
+        type=int,
+        default=1,
+        help="Width of bins for plots and mode.",
+    )
+    parser.add_argument(
+        "--threshold_format",
+        dest="threshold_format",
+        help="Percentage to filter the 95th percentile.",
+    )
+    parser.add_argument(
+        "--relative_threshold",
+        dest="relative_threshold",
+        type=float,
+        default=0.0,
+        help="Percentage to filter the 95th percentile.",
+    )
+    parser.add_argument(
+        "--absolute_threshold",
+        dest="absolute_threshold",
+        type=float,
+        default=0.0,
+        help="Absolute value to filter.",
+    )
+    parser.add_argument(
+        "--output_files",
+        dest="output_files",
+        default="matched_pair",
+        help="Restrict output dataset collections.",
+    )
+    parser.add_argument(
+        "--statistics_output", dest="statistics_output", help="Statistics output file."
+    )
     args = parser.parse_args()
 
     cwpair2_util.create_directories()
@@ -41,14 +86,16 @@
     else:
         threshold = 0
     for (dataset_path, hid) in args.inputs:
-        stats = cwpair2_util.process_file(dataset_path,
-                                          hid,
-                                          args.method,
-                                          threshold,
-                                          args.up_distance,
-                                          args.down_distance,
-                                          args.binsize,
-                                          args.output_files)
+        stats = cwpair2_util.process_file(
+            dataset_path,
+            hid,
+            args.method,
+            threshold,
+            args.up_distance,
+            args.down_distance,
+            args.binsize,
+            args.output_files,
+        )
         statistics.extend(stats)
     # Accumulate statistics.
     by_file = {}
@@ -56,13 +103,15 @@
         # Skip "None" statistics from failed files
         if not stats:
             continue
-        path = stats['stats_path']
+        path = stats["stats_path"]
         if path not in by_file:
             by_file[path] = []
         by_file[path].append(stats)
     # Write tabular statistics file.
-    keys = ['fname', 'final_mode', 'preview_mode', 'perc95', 'paired', 'orphans']
-    statistics_out = csv.writer(open(args.statistics_output, 'wt'), delimiter='\t', lineterminator="\n")
+    keys = ["fname", "final_mode", "preview_mode", "perc95", "paired", "orphans"]
+    statistics_out = csv.writer(
+        open(args.statistics_output, "wt"), delimiter="\t", lineterminator="\n"
+    )
     statistics_out.writerow(keys)
     for file_path, statistics in by_file.items():
         for stats in statistics:
diff --git a/tools/cwpair2/cwpair2_util.py b/tools/cwpair2/cwpair2_util.py
index 0cd0ffb6a4e..c9fb2f17818 100644
--- a/tools/cwpair2/cwpair2_util.py
+++ b/tools/cwpair2/cwpair2_util.py
@@ -5,39 +5,39 @@
 import traceback
 
 import matplotlib
-matplotlib.use('Agg')
+
+matplotlib.use("Agg")
 from matplotlib import pyplot  # noqa: I202,E402
 
 # Data outputs
-DETAILS = 'D'
-MATCHED_PAIRS = 'MP'
-ORPHANS = 'O'
+DETAILS = "D"
+MATCHED_PAIRS = "MP"
+ORPHANS = "O"
 # Data output formats
-GFF_EXT = 'gff'
-TABULAR_EXT = 'tabular'
+GFF_EXT = "gff"
+TABULAR_EXT = "tabular"
 # Statistics histograms output directory.
-HISTOGRAM = 'H'
+HISTOGRAM = "H"
 # Statistics outputs
-FINAL_PLOTS = 'F'
-PREVIEW_PLOTS = 'P'
-STATS_GRAPH = 'C'
+FINAL_PLOTS = "F"
+PREVIEW_PLOTS = "P"
+STATS_GRAPH = "C"
 
 # Graph settings.
-COLORS = 'krg'
-Y_LABEL = 'Peak-pair counts'
-X_LABEL = 'Peak-pair distance (bp)'
+COLORS = "krg"
+Y_LABEL = "Peak-pair counts"
+X_LABEL = "Peak-pair distance (bp)"
 TICK_WIDTH = 3
 ADJUST = [0.140, 0.9, 0.9, 0.1]
-PLOT_FORMAT = 'pdf'
-pyplot.rc('xtick.major', size=10.00)
-pyplot.rc('ytick.major', size=10.00)
-pyplot.rc('lines', linewidth=4.00)
-pyplot.rc('axes', linewidth=3.00)
-pyplot.rc('font', family='Bitstream Vera Sans', size=32.0)
+PLOT_FORMAT = "pdf"
+pyplot.rc("xtick.major", size=10.00)
+pyplot.rc("ytick.major", size=10.00)
+pyplot.rc("lines", linewidth=4.00)
+pyplot.rc("axes", linewidth=3.00)
+pyplot.rc("font", family="Bitstream Vera Sans", size=32.0)
 
 
 class FrequencyDistribution(object):
-
     def __init__(self, start, end, binsize=10, d=None):
         self.start = start
         self.end = end
@@ -48,7 +48,11 @@ def get_bin(self, x):
         """
         Returns the centre of the bin in which a data point falls
         """
-        return self.start + (x - self.start) // self.binsize * self.binsize + self.binsize / 2.0
+        return (
+            self.start
+            + (x - self.start) // self.binsize * self.binsize
+            + self.binsize / 2.0
+        )
 
     def add(self, x):
         x = self.get_bin(x)
@@ -84,14 +88,16 @@ def distance(peak1, peak2):
     return (peak2[1] + peak2[2]) / 2.0 - (peak1[1] + peak1[2]) / 2.0
 
 
-def gff_row(cname, start, end, score, source, type='.', strand='.', phase='.', attrs={}):
+def gff_row(
+    cname, start, end, score, source, type=".", strand=".", phase=".", attrs={}
+):
     return (cname, source, type, start, end, score, strand, phase, gff_attrs(attrs))
 
 
 def gff_attrs(d):
     if not d:
-        return '.'
-    return ';'.join('%s=%s' % item for item in d.items())
+        return "."
+    return ";".join("%s=%s" % item for item in d.items())
 
 
 def parse_chromosomes(reader):
@@ -99,7 +105,7 @@ def parse_chromosomes(reader):
     chromosomes = {}
     for line in reader:
         line = line.rstrip("\r\n")
-        if not line or line.startswith('#'):
+        if not line or line.startswith("#"):
             continue
         cname, _, _, start, end, value, strand, _, _ = line.split("\t")
         start = int(start)
@@ -139,8 +145,8 @@ def peak_filter(chromosomes, threshold):
 
 
 def split_strands(chromosome):
-    watson = [peak for peak in chromosome if peak[0] == '+']
-    crick = [peak for peak in chromosome if peak[0] == '-']
+    watson = [peak for peak in chromosome if peak[0] == "+"]
+    crick = [peak for peak in chromosome if peak[0] == "-"]
     return watson, crick
 
 
@@ -192,6 +198,7 @@ def key(cpeak):
             # And then prefer less negative distances
             d = 10000 - d
         return d
+
     return min(window, key=key)
 
 
@@ -201,125 +208,167 @@ def match_mode(window, peak, mode):
     return min(window, key=lambda cpeak: abs(distance(peak, cpeak) - mode))
 
 
-METHODS = {'mode': match_mode, 'closest': match_closest, 'largest': match_largest}
+METHODS = {"mode": match_mode, "closest": match_closest, "largest": match_largest}
 
 
-def frequency_plot(freqs, fname, labels=[], title=''):
+def frequency_plot(freqs, fname, labels=[], title=""):
     pyplot.clf()
     pyplot.figure(figsize=(10, 10))
     for i, freq in enumerate(freqs):
         x, y = freq.graph_series()
-        pyplot.plot(x, y, '%s-' % COLORS[i])
+        pyplot.plot(x, y, "%s-" % COLORS[i])
     if len(freqs) > 1:
         pyplot.legend(labels)
     pyplot.xlim(freq.start, freq.end)
     pyplot.ylim(ymin=0)
     pyplot.ylabel(Y_LABEL)
     pyplot.xlabel(X_LABEL)
-    pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
+    pyplot.subplots_adjust(
+        left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3]
+    )
     # Get the current axes
     ax = pyplot.gca()
-    for l in ax.get_xticklines() + ax.get_yticklines():
-        l.set_markeredgewidth(TICK_WIDTH)
+    for line in ax.get_xticklines() + ax.get_yticklines():
+        line.set_markeredgewidth(TICK_WIDTH)
     pyplot.savefig(fname)
 
 
 def create_directories():
     # Output histograms in pdf.
     os.mkdir(HISTOGRAM)
-    os.mkdir('data_%s' % DETAILS)
-    os.mkdir('data_%s' % ORPHANS)
-    os.mkdir('data_%s' % MATCHED_PAIRS)
-
-
-def process_file(dataset_path, galaxy_hid, method, threshold, up_distance,
-                 down_distance, binsize, output_files):
-    if method == 'all':
+    os.mkdir("data_%s" % DETAILS)
+    os.mkdir("data_%s" % ORPHANS)
+    os.mkdir("data_%s" % MATCHED_PAIRS)
+
+
+def process_file(
+    dataset_path,
+    galaxy_hid,
+    method,
+    threshold,
+    up_distance,
+    down_distance,
+    binsize,
+    output_files,
+):
+    if method == "all":
         match_methods = METHODS.keys()
     else:
         match_methods = [method]
     statistics = []
     for match_method in match_methods:
-        stats = perform_process(dataset_path,
-                                galaxy_hid,
-                                match_method,
-                                threshold,
-                                up_distance,
-                                down_distance,
-                                binsize,
-                                output_files)
+        stats = perform_process(
+            dataset_path,
+            galaxy_hid,
+            match_method,
+            threshold,
+            up_distance,
+            down_distance,
+            binsize,
+            output_files,
+        )
         statistics.append(stats)
-    if output_files == 'all' and method == 'all':
-        frequency_plot([s['dist'] for s in statistics],
-                       statistics[0]['graph_path'],
-                       labels=list(METHODS.keys()))
+    if output_files == "all" and method == "all":
+        frequency_plot(
+            [s["dist"] for s in statistics],
+            statistics[0]["graph_path"],
+            labels=list(METHODS.keys()),
+        )
     return statistics
 
 
-def perform_process(dataset_path, galaxy_hid, method, threshold, up_distance,
-                    down_distance, binsize, output_files):
+def perform_process(
+    dataset_path,
+    galaxy_hid,
+    method,
+    threshold,
+    up_distance,
+    down_distance,
+    binsize,
+    output_files,
+):
     output_details = output_files in ["all", "matched_pair_orphan_detail"]
     output_plots = output_files in ["all"]
-    output_orphans = output_files in ["all", "matched_pair_orphan", "matched_pair_orphan_detail"]
+    output_orphans = output_files in [
+        "all",
+        "matched_pair_orphan",
+        "matched_pair_orphan_detail",
+    ]
     # Keep track of statistics for the output file
     statistics = {}
     fpath, fname = os.path.split(dataset_path)
-    statistics['fname'] = '%s: data %s' % (method, str(galaxy_hid))
-    statistics['dir'] = fpath
+    statistics["fname"] = "%s: data %s" % (method, str(galaxy_hid))
+    statistics["dir"] = fpath
     if threshold >= 1:
-        filter_string = 'fa%d' % threshold
+        filter_string = "fa%d" % threshold
     else:
-        filter_string = 'f%d' % (threshold * 100)
-    fname = '%s_%su%dd%d_on_data_%s' % (method, filter_string, up_distance, down_distance, galaxy_hid)
+        filter_string = "f%d" % (threshold * 100)
+    fname = "%s_%su%dd%d_on_data_%s" % (
+        method,
+        filter_string,
+        up_distance,
+        down_distance,
+        galaxy_hid,
+    )
 
     def make_histogram_path(output_type, fname):
-        return os.path.join(HISTOGRAM, 'histogram_%s_%s.%s' % (output_type, fname, PLOT_FORMAT))
+        return os.path.join(
+            HISTOGRAM, "histogram_%s_%s.%s" % (output_type, fname, PLOT_FORMAT)
+        )
 
     def make_path(output_type, extension, fname):
         # Returns the full path for an output.
-        return os.path.join(output_type, '%s_%s.%s' % (output_type, fname, extension))
+        return os.path.join(output_type, "%s_%s.%s" % (output_type, fname, extension))
 
     def td_writer(output_type, extension, fname):
         # Returns a tab-delimited writer for a specified output.
         output_file_path = make_path(output_type, extension, fname)
-        return csv.writer(open(output_file_path, 'wt'), delimiter='\t', lineterminator="\n")
+        return csv.writer(
+            open(output_file_path, "wt"), delimiter="\t", lineterminator="\n"
+        )
 
-    with open(dataset_path, 'rt') as input:
+    with open(dataset_path, "rt") as input:
         try:
             chromosomes = parse_chromosomes(input)
         except Exception:
-            stop_err('Unable to parse file "%s".\n%s' % (dataset_path, traceback.format_exc()))
+            stop_err(
+                'Unable to parse file "%s".\n%s'
+                % (dataset_path, traceback.format_exc())
+            )
     if output_details:
         # Details
-        detailed_output = td_writer('data_%s' % DETAILS, TABULAR_EXT, fname)
-        detailed_output.writerow(('chrom', 'start', 'end', 'value', 'strand') * 2 + ('midpoint', 'c-w reads sum', 'c-w distance (bp)'))
+        detailed_output = td_writer("data_%s" % DETAILS, TABULAR_EXT, fname)
+        detailed_output.writerow(
+            ("chrom", "start", "end", "value", "strand") * 2
+            + ("midpoint", "c-w reads sum", "c-w distance (bp)")
+        )
     if output_plots:
         # Final Plot
         final_plot_path = make_histogram_path(FINAL_PLOTS, fname)
     if output_orphans:
         # Orphans
-        orphan_output = td_writer('data_%s' % ORPHANS, TABULAR_EXT, fname)
-        orphan_output.writerow(('chrom', 'strand', 'start', 'end', 'value'))
+        orphan_output = td_writer("data_%s" % ORPHANS, TABULAR_EXT, fname)
+        orphan_output.writerow(("chrom", "strand", "start", "end", "value"))
     if output_plots:
         # Preview Plot
         preview_plot_path = make_histogram_path(PREVIEW_PLOTS, fname)
     # Matched Pairs.
-    matched_pairs_output = td_writer('data_%s' % MATCHED_PAIRS, GFF_EXT, fname)
-    statistics['stats_path'] = 'statistics.%s' % TABULAR_EXT
+    matched_pairs_output = td_writer("data_%s" % MATCHED_PAIRS, GFF_EXT, fname)
+    statistics["stats_path"] = "statistics.%s" % TABULAR_EXT
     if output_plots:
-        statistics['graph_path'] = make_histogram_path(STATS_GRAPH, fname)
-    statistics['perc95'] = perc95(chromosomes)
+        statistics["graph_path"] = make_histogram_path(STATS_GRAPH, fname)
+    statistics["perc95"] = perc95(chromosomes)
     if threshold > 0:
         # Apply peak_filter
         peak_filter(chromosomes, threshold)
-    if method == 'mode':
+    if method == "mode":
         freq = all_pair_distribution(chromosomes, up_distance, down_distance, binsize)
         mode = freq.mode()
-        statistics['preview_mode'] = mode
+        statistics["preview_mode"] = mode
         if output_plots:
-            frequency_plot([freq], preview_plot_path, title='Preview frequency plot')
+            frequency_plot([freq], preview_plot_path, title="Preview frequency plot")
     else:
-        statistics['preview_mode'] = 'NA'
+        statistics["preview_mode"] = "NA"
     dist = FrequencyDistribution(-up_distance, down_distance, binsize=binsize)
     orphans = 0
     # x will be used to archive the summary dataset
@@ -334,7 +383,7 @@ def td_writer(output_type, extension, fname):
         keys = make_keys(crick)
         for peak in watson:
             window = get_window(crick, peak, up_distance, down_distance, keys)
-            if method == 'mode':
+            if method == "mode":
                 match = match_mode(window, peak, mode)
             else:
                 match = METHODS[method](window, peak)
@@ -343,25 +392,34 @@ def td_writer(output_type, extension, fname):
                 d = distance(peak, match)
                 dist.add(d)
                 # Simple output in gff format.
-                x.append(gff_row(cname,
-                                 source='cwpair',
-                                 start=midpoint,
-                                 end=midpoint + 1,
-                                 score=peak[3] + match[3],
-                                 attrs={'cw_distance': d}))
+                x.append(
+                    gff_row(
+                        cname,
+                        source="cwpair",
+                        start=midpoint,
+                        end=midpoint + 1,
+                        score=peak[3] + match[3],
+                        attrs={"cw_distance": d},
+                    )
+                )
                 if output_details:
-                    detailed_output.writerow((cname,
-                                              peak[1],
-                                              peak[2],
-                                              peak[3],
-                                              '+',
-                                              cname,
-                                              match[1],
-                                              match[2],
-                                              match[3], '-',
-                                              midpoint,
-                                              peak[3] + match[3],
-                                              d))
+                    detailed_output.writerow(
+                        (
+                            cname,
+                            peak[1],
+                            peak[2],
+                            peak[3],
+                            "+",
+                            cname,
+                            match[1],
+                            match[2],
+                            match[3],
+                            "-",
+                            midpoint,
+                            peak[3] + match[3],
+                            d,
+                        )
+                    )
                 i = bisect.bisect_left(keys, (match[1] + match[2]) / 2)
                 del crick[i]
                 del keys[i]
@@ -384,19 +442,19 @@ def td_writer(output_type, extension, fname):
         # Dataset in tuple cannot be modified in Python, so row will
         # be converted to list format to add 'chr'.
         if row_tmp[0] == "999":
-            row_tmp[0] = 'chrM'
+            row_tmp[0] = "chrM"
         elif row_tmp[0] == "998":
-            row_tmp[0] = 'chrY'
+            row_tmp[0] = "chrY"
         elif row_tmp[0] == "997":
-            row_tmp[0] = 'chrX'
+            row_tmp[0] = "chrX"
         else:
             row_tmp[0] = row_tmp[0]
         # Print row_tmp.
         matched_pairs_output.writerow(row_tmp)
-    statistics['paired'] = dist.size() * 2
-    statistics['orphans'] = orphans
-    statistics['final_mode'] = dist.mode()
+    statistics["paired"] = dist.size() * 2
+    statistics["orphans"] = orphans
+    statistics["final_mode"] = dist.mode()
     if output_plots:
-        frequency_plot([dist], final_plot_path, title='Frequency distribution')
-    statistics['dist'] = dist
+        frequency_plot([dist], final_plot_path, title="Frequency distribution")
+    statistics["dist"] = dist
     return statistics
diff --git a/tools/ebi_tools/ebeye_urllib.py b/tools/ebi_tools/ebeye_urllib.py
index 7f1b5eb8cda..c7e3207b399 100644
--- a/tools/ebi_tools/ebeye_urllib.py
+++ b/tools/ebi_tools/ebeye_urllib.py
@@ -22,7 +22,7 @@
 from xmltramp2 import xmltramp
 
 # Service base URL
-baseUrl = 'https://www.ebi.ac.uk/ebisearch/ws/rest'
+baseUrl = "https://www.ebi.ac.uk/ebisearch/ws/rest"
 
 # Debug level
 debugLevel = 0
@@ -30,66 +30,63 @@
 
 # Debug print
 def printDebugMessage(functionName, message, level):
-    if(level <= debugLevel):
-        print('[' + functionName + '] ' + message)
+    if level <= debugLevel:
+        print("[" + functionName + "] " + message)
 
 
 # User-agent for request.
 def getUserAgent():
-    printDebugMessage('getUserAgent', 'Begin', 11)
-    urllib_agent = 'Python-urllib/%s' % platform.python_version()
-    clientRevision = '$Revision: 2468 $'
-    clientVersion = '0'
+    printDebugMessage("getUserAgent", "Begin", 11)
+    urllib_agent = "Python-urllib/%s" % platform.python_version()
+    clientRevision = "$Revision: 2468 $"
+    clientVersion = "0"
     if len(clientRevision) > 11:
         clientVersion = clientRevision[11:-2]
-    user_agent = 'EBI-Sample-Client/%s (%s; Python %s; %s) %s' % (
-        clientVersion, os.path.basename(__file__),
-        platform.python_version(), platform.system(),
-        urllib_agent
+    user_agent = "EBI-Sample-Client/%s (%s; Python %s; %s) %s" % (
+        clientVersion,
+        os.path.basename(__file__),
+        platform.python_version(),
+        platform.system(),
+        urllib_agent,
     )
-    printDebugMessage('getUserAgent', 'user_agent: %s' % user_agent, 12)
-    printDebugMessage('getUserAgent', 'End', 11)
+    printDebugMessage("getUserAgent", "user_agent: %s" % user_agent, 12)
+    printDebugMessage("getUserAgent", "End", 11)
     return user_agent
 
 
 # Wrapper for a REST (HTTP GET) request
 def restRequest(url):
-    printDebugMessage('restRequest', 'Begin', 11)
-    printDebugMessage('restRequest', 'url: %s' % url, 11)
+    printDebugMessage("restRequest", "Begin", 11)
+    printDebugMessage("restRequest", "url: %s" % url, 11)
     url = quote(url, safe="%/:=&?~#+!$,;'@()*[]")
 
     try:
         user_agent = getUserAgent()
-        http_headers = {
-            'User-Agent': user_agent,
-            'Accept-Encoding': 'gzip'
-        }
+        http_headers = {"User-Agent": user_agent, "Accept-Encoding": "gzip"}
         req = Request(url, None, http_headers)
         resp = urlopen(req)
-        encoding = resp.info().get('Content-Encoding')
+        encoding = resp.info().get("Content-Encoding")
         result = None
-        if encoding is None or encoding == 'identity':
-            result = text_type(resp.read(), 'utf-8')
-        elif encoding == 'gzip':
+        if encoding is None or encoding == "identity":
+            result = text_type(resp.read(), "utf-8")
+        elif encoding == "gzip":
             result = resp.read()
-            printDebugMessage('restRequest', 'result: %s' % result, 21)
-            gz = GzipFile(
-                fileobj=BytesIO(result),
-                mode="r")
-            result = text_type(gz.read(), 'utf-8')
+            printDebugMessage("restRequest", "result: %s" % result, 21)
+            gz = GzipFile(fileobj=BytesIO(result), mode="r")
+            result = text_type(gz.read(), "utf-8")
         else:
-            raise Exception('Unsupported Content-Encoding')
+            raise Exception("Unsupported Content-Encoding")
         resp.close()
     except HTTPError as ex:
         raise ex
-    printDebugMessage('restRequest', 'result: %s' % result, 11)
-    printDebugMessage('restRequest', 'End', 11)
+    printDebugMessage("restRequest", "result: %s" % result, 11)
+    printDebugMessage("restRequest", "End", 11)
     return result
 
 
 def hasSubdomains(domainInfo):
     for dir in domainInfo._dir:
-        if dir._name == 'subdomains':
+        if dir._name == "subdomains":
             return True
     return False
 
@@ -99,30 +96,31 @@ def extractUsefulFields(fieldInfos):
     retrievable = []
 
     for fieldInfo in fieldInfos:
-        if fieldInfo('id') == "$facets":
+        if fieldInfo("id") == "$facets":
             continue
 
-        options = fieldInfo['options']['option':]
+        options = fieldInfo["options"]["option":]
         for option in options:
             if option("name") == "searchable" and str(option) == "true":
-                searchable.append(fieldInfo('id'))
+                searchable.append(fieldInfo("id"))
             if option("name") == "retrievable" and str(option) == "true":
-                retrievable.append(fieldInfo('id'))
+                retrievable.append(fieldInfo("id"))
     return searchable, retrievable
 
 
 def extractLowerLevelDomains(domainInfo, domains):
     if hasSubdomains(domainInfo):
-        subdomains = domainInfo['subdomains']['domain':]
+        subdomains = domainInfo["subdomains"]["domain":]
         for subdomain in subdomains:
-            domains = extractLowerLevelDomains( subdomain, domains)
+            domains = extractLowerLevelDomains(subdomain, domains)
     else:
         searchable, retrievable = extractUsefulFields(
-            domainInfo['fieldInfos']['fieldInfo':])
+            domainInfo["fieldInfos"]["fieldInfo":]
+        )
 
-        domain_id = domainInfo('id')
+        domain_id = domainInfo("id")
         domains.setdefault(domain_id, {})
-        domains[domain_id]["name"] = domainInfo('name')
+        domains[domain_id]["name"] = domainInfo("name")
         domains[domain_id]["searchable_fields"] = sorted(searchable)
         domains[domain_id]["retrievable_fields"] = sorted(retrievable)
     return domains
@@ -130,18 +128,18 @@ def extractLowerLevelDomains(domainInfo, domains):
 
 # Get domain Hierarchy
 def getDomainHierarchy():
-    requestUrl = baseUrl + '/allebi'
+    requestUrl = baseUrl + "/allebi"
     xmlDoc = restRequest(requestUrl)
     doc = xmltramp.parse(xmlDoc)
-    allebi = doc['domains']['domain']
+    allebi = doc["domains"]["domain"]
     lower_level_domains = extractLowerLevelDomains(allebi, {})
-    printDebugMessage('getDomainHierarchy', 'End', 1)
+    printDebugMessage("getDomainHierarchy", "End", 1)
     return lower_level_domains
 
 
 # Check if a databaseInfo matches a database name.
 def is_database(dbInfo, dbName):
-    printDebugMessage('is_database', 'Begin', 11)
+    printDebugMessage("is_database", "Begin", 11)
     retVal = False
     if str(dbInfo.name) == dbName:
         retVal = True
@@ -149,27 +147,27 @@ def is_database(dbInfo, dbName):
         for dbAlias in dbInfo.aliasList:
             if str(dbAlias) == dbName:
                 retVal = True
-    printDebugMessage('is_database', 'retVal: %s' % retVal, 11)
-    printDebugMessage('is_database', 'End', 11)
+    printDebugMessage("is_database", "retVal: %s" % retVal, 11)
+    printDebugMessage("is_database", "End", 11)
     return retVal
 
 
 # Get number of results
 def getNumberOfResults(domain, query):
-    printDebugMessage('getNumberOfResults', 'Begin', 1)
-    requestUrl = baseUrl + '/' + domain + '?query=' + query + '&size=0'
-    printDebugMessage('getNumberOfResults', requestUrl, 2)
+    printDebugMessage("getNumberOfResults", "Begin", 1)
+    requestUrl = baseUrl + "/" + domain + "?query=" + query + "&size=0"
+    printDebugMessage("getNumberOfResults", requestUrl, 2)
     xmlDoc = restRequest(requestUrl)
     doc = xmltramp.parse(xmlDoc)
-    numberOfResults = int(str(doc['hitCount']))
-    printDebugMessage('getNumberOfResults', 'End', 1)
+    numberOfResults = int(str(doc["hitCount"]))
+    printDebugMessage("getNumberOfResults", "End", 1)
     return numberOfResults
 
 
 def makeRequest(requestUrl):
     xmlDoc = restRequest(requestUrl)
     doc = xmltramp.parse(xmlDoc)
-    entries = doc['entries']['entry':]
+    entries = doc["entries"]["entry":]
     formatted_output = printEntries(entries)
     return formatted_output
 
@@ -181,21 +179,21 @@ def getResults(domain, query, fields):
     quotient = numberOfResults // maximum_size
     start = 0
 
-    printDebugMessage('getResults', 'Begin', 1)
+    printDebugMessage("getResults", "Begin", 1)
     request_output = "%s\tlink\n" % (fields.replace(",", "\t"))
     for i in range(quotient):
         start = maximum_size * i
-        requestUrl = baseUrl + '/' + domain + '?query=' + query
-        requestUrl += '&fields=' + fields + '&size=' + str(maximum_size)
-        requestUrl += '&start=' + str(start) + '&fieldurl=true'
+        requestUrl = baseUrl + "/" + domain + "?query=" + query
+        requestUrl += "&fields=" + fields + "&size=" + str(maximum_size)
+        requestUrl += "&start=" + str(start) + "&fieldurl=true"
         request_output += makeRequest(requestUrl)
 
     if (numberOfResults % 100) > 0:
         start = maximum_size * quotient
         remainder = numberOfResults - start
-        requestUrl = baseUrl + '/' + domain + '?query=' + query
-        requestUrl += '&fields=' + fields + '&size=' + str(remainder)
-        requestUrl += '&start=' + str(start) + '&fieldurl=true'
+        requestUrl = baseUrl + "/" + domain + "?query=" + query
+        requestUrl += "&fields=" + fields + "&size=" + str(remainder)
+        requestUrl += "&start=" + str(start) + "&fieldurl=true"
         request_output += makeRequest(requestUrl)
 
     print(request_output)
@@ -203,15 +201,15 @@ def getResults(domain, query, fields):
 
 def printEntries(entries):
     output = ""
-    printDebugMessage('printEntries', 'Begin', 1)
+    printDebugMessage("printEntries", "Begin", 1)
     for entry in entries:
         sep = ""
-        for field in entry['fields']['field':]:
+        for field in entry["fields"]["field":]:
             output += "%s" % (sep)
-            fields = field['values']['value':]
+            fields = field["values"]["value":]
             if len(fields) > 0:
                 sub_sep = ""
-                for value in field['values']['value':]:
+                for value in field["values"]["value":]:
                     output += "%s%s" % (sub_sep, value)
                     sub_sep = ","
             sep = "\t"
@@ -219,53 +217,53 @@ def printEntries(entries):
         if hasFieldUrls(entry):
             output += "%s" % (sep)
             sub_sep = ""
-            for fieldurl in entry['fieldURLs']['fieldURL':]:
+            for fieldurl in entry["fieldURLs"]["fieldURL":]:
                 output += "%s%s" % (sub_sep, str(fieldurl))
                 sub_sep = ","
             sep = "\t"
         if hasViewUrls(entry):
             output += "%s" % (sep)
             sub_sep = ""
-            for viewurl in entry['viewURLs']['viewURL':]:
+            for viewurl in entry["viewURLs"]["viewURL":]:
                 output += "%s%s" % (sub_sep, str(viewurl))
                 sub_sep = ","
         output += "\n"
-    printDebugMessage('printEntries', 'End', 1)
+    printDebugMessage("printEntries", "End", 1)
     return output
 
 
 def hasFieldUrls(entry):
     for dir in entry._dir:
-        if dir._name == 'fieldURLs':
+        if dir._name == "fieldURLs":
             return True
     return False
 
 
 def hasViewUrls(entry):
     for dir in entry._dir:
-        if dir._name == 'viewURLs':
+        if dir._name == "viewURLs":
             return True
     return False
 
 
 def getRunLink(run_id):
-    printDebugMessage('getEntries', 'Begin', 1)
-    requestUrl = baseUrl + '/metagenomics_runs/entry/' + run_id + '?fieldurl=true'
-    printDebugMessage('getEntries', requestUrl, 2)
+    printDebugMessage("getEntries", "Begin", 1)
+    requestUrl = baseUrl + "/metagenomics_runs/entry/" + run_id + "?fieldurl=true"
+    printDebugMessage("getEntries", requestUrl, 2)
     xmlDoc = restRequest(requestUrl)
     doc = xmltramp.parse(xmlDoc)
-    entries = doc['entries']['entry':]
-    fieldURL = ''
+    entries = doc["entries"]["entry":]
+    fieldURL = ""
     for entry in entries:
-        for fieldurl in entry['fieldURLs']['fieldURL':]:
+        for fieldurl in entry["fieldURLs"]["fieldURL":]:
             fieldURL += str(fieldurl)
-    printDebugMessage('getEntries', 'End', 1)
-    p = re.compile('http')
-    fieldURL = p.sub('https', fieldURL)
+    printDebugMessage("getEntries", "End", 1)
+    p = re.compile("http")
+    fieldURL = p.sub("https", fieldURL)
     print(fieldURL)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # Usage message
     usage = """
       %prog getDomainHierarchy
@@ -277,10 +275,7 @@ def getRunLink(run_id):
     description += "The searching tools are using the EB-eye search engine. "
     description += "http://www.ebi.ac.uk/ebisearch/"
     # Process command-line options
-    parser = OptionParser(
-        usage=usage,
-        description=description,
-        version='1.0')
+    parser = OptionParser(usage=usage, description=description, version="1.0")
     (options, args) = parser.parse_args()
 
     # No arguments, print usage
@@ -288,24 +283,24 @@ def getRunLink(run_id):
         parser.print_help()
 
     # Get domain hierarchy
-    elif args[0] == 'getDomainHierarchy':
+    elif args[0] == "getDomainHierarchy":
         getDomainHierarchy()
 
     # Get search results
-    elif args[0] == 'getResults':
+    elif args[0] == "getResults":
         if len(args) < 4:
-            print('domain, query and fields should be given.')
+            print("domain, query and fields should be given.")
         else:
             getResults(args[1], args[2], args[3])
 
     # Get run link results
-    elif args[0] == 'getRunLink':
+    elif args[0] == "getRunLink":
         if len(args) < 2:
-            print('run id should be given.')
+            print("run id should be given.")
         else:
             getRunLink(args[1])
 
     # Unknown argument combination, display usage
     else:
-        print('Error: unrecognised argument combination')
+        print("Error: unrecognised argument combination")
         parser.print_help()
diff --git a/tools/extract_genomic_dna/extract_genomic_dna.py b/tools/extract_genomic_dna/extract_genomic_dna.py
index 67efa7482cf..01a44925481 100644
--- a/tools/extract_genomic_dna/extract_genomic_dna.py
+++ b/tools/extract_genomic_dna/extract_genomic_dna.py
@@ -11,24 +11,47 @@
 import extract_genomic_dna_utils as egdu  # noqa: I100,I202
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--input_format', dest='input_format', help="Input dataset format")
-parser.add_argument('--input', dest='input', help="Input dataset")
-parser.add_argument('--genome', dest='genome', help="Input dataset genome build")
-parser.add_argument('--interpret_features', dest='interpret_features', default=None, help="Interpret features if input format is gff")
-parser.add_argument('--columns', dest='columns', help="Columns to use in input file")
-parser.add_argument('--reference_genome_source', dest='reference_genome_source', help="Source of reference genome file")
-parser.add_argument('--reference_genome', dest='reference_genome', help="Reference genome file")
-parser.add_argument('--output_format', dest='output_format', help="Output format")
-parser.add_argument('--fasta_header_type', dest='fasta_header_type', default=None, help="Fasta header format")
-parser.add_argument('--fasta_header_delimiter', dest='fasta_header_delimiter', default=None, help="Fasta header field delimiter")
-parser.add_argument('--output', dest='output', help="Output dataset")
+parser.add_argument("--input_format", dest="input_format", help="Input dataset format")
+parser.add_argument("--input", dest="input", help="Input dataset")
+parser.add_argument("--genome", dest="genome", help="Input dataset genome build")
+parser.add_argument(
+    "--interpret_features",
+    dest="interpret_features",
+    default=None,
+    help="Interpret features if input format is gff",
+)
+parser.add_argument("--columns", dest="columns", help="Columns to use in input file")
+parser.add_argument(
+    "--reference_genome_source",
+    dest="reference_genome_source",
+    help="Source of reference genome file",
+)
+parser.add_argument(
+    "--reference_genome", dest="reference_genome", help="Reference genome file"
+)
+parser.add_argument("--output_format", dest="output_format", help="Output format")
+parser.add_argument(
+    "--fasta_header_type",
+    dest="fasta_header_type",
+    default=None,
+    help="Fasta header format",
+)
+parser.add_argument(
+    "--fasta_header_delimiter",
+    dest="fasta_header_delimiter",
+    default=None,
+    help="Fasta header field delimiter",
+)
+parser.add_argument("--output", dest="output", help="Output dataset")
 args = parser.parse_args()
 
-input_is_gff = args.input_format == 'gff'
+input_is_gff = args.input_format == "gff"
 interpret_features = input_is_gff and args.interpret_features == "yes"
-if len(args.columns.split(',')) == 5:
+if len(args.columns.split(",")) == 5:
     # Bed file.
-    chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(args.columns)
+    chrom_col, start_col, end_col, strand_col, name_col = egdu.parse_cols_arg(
+        args.columns
+    )
 else:
     # Gff file.
     chrom_col, start_col, end_col, strand_col = egdu.parse_cols_arg(args.columns)
@@ -47,13 +70,13 @@
 first_invalid_line = 0
 invalid_lines = []
 warnings = []
-warning = ''
+warning = ""
 twobitfile = None
 line_count = 1
 file_iterator = open(args.input)
 if interpret_features:
     file_iterator = egdu.GFFReaderWrapper(file_iterator, fix_strand=False)
-out = open(args.output, 'wt')
+out = open(args.output, "wt")
 
 for feature in file_iterator:
     # Ignore comments, headers.
@@ -70,9 +93,9 @@
         strand = feature.strand
     else:
         # Processing lines, either interval or GFF format.
-        line = feature.rstrip('\r\n')
+        line = feature.rstrip("\r\n")
         if line and not line.startswith("#"):
-            fields = line.split('\t')
+            fields = line.split("\t")
             try:
                 chrom = fields[chrom_col]
                 start = int(fields[start_col])
@@ -99,9 +122,9 @@
                     first_invalid_line = line_count
                 skipped_lines += len(invalid_lines)
                 continue
-            if strand not in ['+', '-']:
-                strand = '+'
-            sequence = ''
+            if strand not in ["+", "-"]:
+                strand = "+"
+            sequence = ""
         else:
             continue
     # Open sequence file and get sequence for feature/interval.
@@ -109,11 +132,16 @@
         if chrom in nibs:
             nib = nibs[chrom]
         else:
-            nibs[chrom] = nib = bx.seq.nib.NibFile(open("%s/%s.nib" % (seq_path, chrom)))
+            nibs[chrom] = nib = bx.seq.nib.NibFile(
+                open("%s/%s.nib" % (seq_path, chrom))
+            )
         try:
             sequence = nib.get(start, end - start)
         except Exception:
-            warning = "Unable to fetch the sequence from '%d' to '%d' for build '%s'. " % (start, end - start, args.genome)
+            warning = (
+                "Unable to fetch the sequence from '%d' to '%d' for build '%s'. "
+                % (start, end - start, args.genome)
+            )
             warnings.append(warning)
             if not invalid_lines:
                 invalid_lines = egdu.get_lines(feature)
@@ -121,18 +149,23 @@
             skipped_lines += len(invalid_lines)
             continue
     elif os.path.isfile(seq_path):
-        if not(twobitfile):
+        if not (twobitfile):
             twobitfile = bx.seq.twobit.TwoBitFile(open(seq_path))
         try:
             if interpret_features:
                 # Create sequence from intervals within a feature.
-                sequence = ''
+                sequence = ""
                 for interval in feature.intervals:
-                    sequence += twobitfile[interval.chrom][interval.start:interval.end]
+                    sequence += twobitfile[interval.chrom][
+                        interval.start: interval.end
+                    ]
             else:
                 sequence = twobitfile[chrom][start:end]
         except Exception:
-            warning = "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. " % (start, end - start, chrom)
+            warning = (
+                "Unable to fetch the sequence from '%d' to '%d' for chrom '%s'. "
+                % (start, end - start, chrom)
+            )
             warnings.append(warning)
             if not invalid_lines:
                 invalid_lines = egdu.get_lines(feature)
@@ -140,15 +173,21 @@
             skipped_lines += len(invalid_lines)
             continue
     else:
-        warning = "Chromosome by name '%s' was not found for build '%s'. " % (chrom, args.genome)
+        warning = "Chromosome by name '%s' was not found for build '%s'. " % (
+            chrom,
+            args.genome,
+        )
         warnings.append(warning)
         if not invalid_lines:
             invalid_lines = egdu.get_lines(feature)
             first_invalid_line = line_count
         skipped_lines += len(invalid_lines)
         continue
-    if sequence == '':
-        warning = "Chrom: '%s', start: '%d', end: '%d' is either invalid or not present in build '%s'. " % (chrom, start, end, args.genome)
+    if sequence == "":
+        warning = (
+            "Chrom: '%s', start: '%d', end: '%d' is either invalid or not present in build '%s'. "
+            % (chrom, start, end, args.genome)
+        )
         warnings.append(warning)
         if not invalid_lines:
             invalid_lines = egdu.get_lines(feature)
@@ -161,15 +200,18 @@
         if input_is_gff:
             start, end = egdu.convert_bed_coords_to_gff([start, end])
         if args.fasta_header_type == "bedtools_getfasta_default":
-            out.write(">%s\n" % egdu.get_bedtools_getfasta_default_header(str(chrom),
-                                                                          str(start),
-                                                                          str(end),
-                                                                          strand,
-                                                                          includes_strand_col))
+            out.write(
+                ">%s\n"
+                % egdu.get_bedtools_getfasta_default_header(
+                    str(chrom), str(start), str(end), strand, includes_strand_col
+                )
+            )
         else:
             # args.fasta_header_type == "char_delimited":
             fields = [args.genome, str(chrom), str(start), str(end), strand]
-            field_delimiter = egdu.get_fasta_header_delimiter(args.fasta_header_delimiter)
+            field_delimiter = egdu.get_fasta_header_delimiter(
+                args.fasta_header_delimiter
+            )
             meta_data = field_delimiter.join(fields)
             if name.strip():
                 out.write(">%s %s\n" % (meta_data, name))
@@ -184,20 +226,24 @@
     else:
         # output_format == "interval".
         if interpret_features:
-            meta_data = "\t".join([feature.chrom,
-                                   "galaxy_extract_genomic_dna",
-                                   "interval",
-                                   str(feature.start),
-                                   str(feature.end),
-                                   feature.score,
-                                   feature.strand,
-                                   ".",
-                                   egdu.gff_attributes_to_str(feature.attributes, "GTF")])
+            meta_data = "\t".join(
+                [
+                    feature.chrom,
+                    "galaxy_extract_genomic_dna",
+                    "interval",
+                    str(feature.start),
+                    str(feature.end),
+                    feature.score,
+                    feature.strand,
+                    ".",
+                    egdu.gff_attributes_to_str(feature.attributes, "GTF"),
+                ]
+            )
         else:
             # Here fields was set up around line 73.
             meta_data = "\t".join(fields)
         if input_is_gff:
-            format_str = "%s seq \"%s\";\n"
+            format_str = '%s seq "%s";\n'
         else:
             format_str = "%s\t%s\n"
         out.write(format_str % (meta_data, str(sequence)))
@@ -214,7 +260,10 @@
     print(warn_msg)
 if skipped_lines:
     # Error message includes up to the first 10 skipped lines.
-    print('Skipped %d invalid lines, 1st is #%d, "%s"' % (skipped_lines, first_invalid_line, '\n'.join(invalid_lines[:10])))
+    print(
+        'Skipped %d invalid lines, 1st is #%d, "%s"'
+        % (skipped_lines, first_invalid_line, "\n".join(invalid_lines[:10]))
+    )
 
 if args.reference_genome_source == "history":
     os.remove(seq_path)
diff --git a/tools/repmatch_gff3/repmatch_gff3_util.py b/tools/repmatch_gff3/repmatch_gff3_util.py
index 4d765c37775..c2d2ba75243 100644
--- a/tools/repmatch_gff3/repmatch_gff3_util.py
+++ b/tools/repmatch_gff3/repmatch_gff3_util.py
@@ -230,18 +230,18 @@ def frequency_histogram(freqs, dataset_path, labels=[], title=''):
     pyplot.ylabel(Y_LABEL)
     pyplot.subplots_adjust(left=ADJUST[0], right=ADJUST[1], top=ADJUST[2], bottom=ADJUST[3])
     ax = pyplot.gca()
-    for l in ax.get_xticklines() + ax.get_yticklines():
-        l.set_markeredgewidth(TICK_WIDTH)
+    for line in ax.get_xticklines() + ax.get_yticklines():
+        line.set_markeredgewidth(TICK_WIDTH)
     pyplot.savefig(dataset_path)
 
 
 METHODS = {'closest': match_closest, 'largest': match_largest}
 
 
-def gff_attrs(l):
-    if len(l) == 0:
+def gff_attrs(attrs):
+    if len(attrs) == 0:
         return '.'
-    return ';'.join('%s=%s' % (tup[0], tup[1]) for tup in l)
+    return ';'.join('%s=%s' % (tup[0], tup[1]) for tup in attrs)
 
 
 def parse_gff_attrs(s):
diff --git a/tools/tn93/tn93_cluster.py b/tools/tn93/tn93_cluster.py
index 2e177ab8831..ebb2ae51cc9 100755
--- a/tools/tn93/tn93_cluster.py
+++ b/tools/tn93/tn93_cluster.py
@@ -12,29 +12,34 @@ def cluster_to_fasta(json_file, fasta_file, reference_name=None):
         with open(fasta_file, "w") as fh2:
             for c in cluster_json:
                 if reference_name is not None:
-                    if reference_name in c['members']:
-                        cc = c['centroid'].split('\n')
+                    if reference_name in c["members"]:
+                        cc = c["centroid"].split("\n")
                         cc[0] = ">" + reference_name
                         print("\n".join(cc), file=fh2)
                         continue
-                print(c['centroid'], file=fh2)
+                print(c["centroid"], file=fh2)
 
-    return(os.path.getmtime(fasta_file), len(cluster_json))
+    return (os.path.getmtime(fasta_file), len(cluster_json))
 
 
 def run_command(command):
-    proc = subprocess.Popen(shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    proc = subprocess.Popen(
+        shlex.split(command), stdout=subprocess.PIPE, stderr=subprocess.PIPE
+    )
     stdout, stderr = proc.communicate()
     result = proc.returncode
     if result != 0:
-        print('Command `%s` failed with exit code %s\n' % (command, result), file=sys.stderr)
-        print('--------------------- STDOUT ---------------------')
-        print(stdout.decode().replace('\\n', '\n'))
-        print('------------------- END STDOUT -------------------')
-        print('--------------------- STDERR ---------------------', file=sys.stderr)
-        print(stderr.decode().replace('\\n', '\n'), file=sys.stderr)
-        print('------------------- END STDERR -------------------', file=sys.stderr)
-    return(int(result))
+        print(
+            "Command `%s` failed with exit code %s\n" % (command, result),
+            file=sys.stderr,
+        )
+        print("--------------------- STDOUT ---------------------")
+        print(stdout.decode().replace("\\n", "\n"))
+        print("------------------- END STDOUT -------------------")
+        print("--------------------- STDERR ---------------------", file=sys.stderr)
+        print(stderr.decode().replace("\\n", "\n"), file=sys.stderr)
+        print("------------------- END STDERR -------------------", file=sys.stderr)
+    return int(result)
 
 
 def main(arguments):
@@ -42,34 +47,55 @@ def main(arguments):
     step = threshold * 0.25
     with open(arguments.reference) as fh:
         for line in fh:
-            if line[0] == '>':
-                _ref_seq_name = line[1:].split(' ')[0].strip()
+            if line[0] == ">":
+                _ref_seq_name = line[1:].split(" ")[0].strip()
                 break
     while threshold <= 1:
-        command = 'tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s' % (threshold, arguments.ambigs, arguments.cluster_type, arguments.overlap, arguments.fraction, arguments.input)
+        command = (
+            "tn93-cluster -o clusters.json -t %g -a %s -c %s -m json -l %d -g %f %s"
+            % (
+                threshold,
+                arguments.ambigs,
+                arguments.cluster_type,
+                arguments.overlap,
+                arguments.fraction,
+                arguments.input,
+            )
+        )
         return_code = run_command(command)
         if return_code != 0:
             return return_code
-        input_stamp, cluster_count = cluster_to_fasta('clusters.json', 'clusters.fa', _ref_seq_name)
+        input_stamp, cluster_count = cluster_to_fasta(
+            "clusters.json", "clusters.fa", _ref_seq_name
+        )
         if cluster_count <= arguments.cluster_count:
             break
         else:
             threshold += step
-        print('Found %d clusters at threshold %f' % (cluster_count, threshold))
+        print("Found %d clusters at threshold %f" % (cluster_count, threshold))
     return 0
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Combine alignments into a single file, adding a reference sequence as well')
-    parser.add_argument('--input', help='Input MSA', required=True, type=str)
-    parser.add_argument('--reference', help='Reference sequence', required=True, type=str)
-    parser.add_argument('--output', help='Input MSA', required=True, type=str)
-    parser.add_argument('--threshold', help='Threshold', required=True, type=float)
-    parser.add_argument('--ambigs', help='Handle ambigs', required=True, type=str)
-    parser.add_argument('--cluster-type', help='Cluster type', required=True, type=str)
-    parser.add_argument('--overlap', help='Overlap', required=True, type=int)
-    parser.add_argument('--fraction', help='Fraction', required=True, type=float)
-    parser.add_argument('--cluster-count', help='Max query', required=True, type=int)
-    parser.add_argument('--compressed', help='File to write compressed clusters to', required=True, type=str)
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(
+        description="Combine alignments into a single file, adding a reference sequence as well"
+    )
+    parser.add_argument("--input", help="Input MSA", required=True, type=str)
+    parser.add_argument(
+        "--reference", help="Reference sequence", required=True, type=str
+    )
+    parser.add_argument("--output", help="Input MSA", required=True, type=str)
+    parser.add_argument("--threshold", help="Threshold", required=True, type=float)
+    parser.add_argument("--ambigs", help="Handle ambigs", required=True, type=str)
+    parser.add_argument("--cluster-type", help="Cluster type", required=True, type=str)
+    parser.add_argument("--overlap", help="Overlap", required=True, type=int)
+    parser.add_argument("--fraction", help="Fraction", required=True, type=float)
+    parser.add_argument("--cluster-count", help="Max query", required=True, type=int)
+    parser.add_argument(
+        "--compressed",
+        help="File to write compressed clusters to",
+        required=True,
+        type=str,
+    )
     arguments = parser.parse_args()
     exit(main(arguments))
diff --git a/tools/vsnp/vsnp_determine_ref_from_data.py b/tools/vsnp/vsnp_determine_ref_from_data.py
index 8261c7fe09c..4f118ae97d9 100755
--- a/tools/vsnp/vsnp_determine_ref_from_data.py
+++ b/tools/vsnp/vsnp_determine_ref_from_data.py
@@ -8,8 +8,8 @@
 import yaml
 from Bio.SeqIO.QualityIO import FastqGeneralIterator
 
-OUTPUT_DBKEY_DIR = 'output_dbkey'
-OUTPUT_METRICS_DIR = 'output_metrics'
+OUTPUT_DBKEY_DIR = "output_dbkey"
+OUTPUT_METRICS_DIR = "output_metrics"
 
 
 def get_sample_name(file_path):
@@ -66,7 +66,15 @@ def get_dnaprints_dict(dnaprint_fields):
     return dnaprints_dict
 
 
-def get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum):
+def get_group_and_dbkey(
+    dnaprints_dict,
+    brucella_string,
+    brucella_sum,
+    bovis_string,
+    bovis_sum,
+    para_string,
+    para_sum,
+):
     if brucella_sum > 3:
         group = "Brucella"
         dbkey = get_dbkey(dnaprints_dict, "brucella", brucella_string)
@@ -118,14 +126,14 @@ def get_seq_counts(value, fastq_list, gzipped):
     count = 0
     for fastq_file in fastq_list:
         if gzipped:
-            with gzip.open(fastq_file, 'rt') as fh:
+            with gzip.open(fastq_file, "rt") as fh:
                 for title, seq, qual in FastqGeneralIterator(fh):
                     count += seq.count(value)
         else:
-            with open(fastq_file, 'r') as fh:
+            with open(fastq_file, "r") as fh:
                 for title, seq, qual in FastqGeneralIterator(fh):
                     count += seq.count(value)
-    return(value, count)
+    return (value, count)
 
 
 def get_species_counts(fastq_list, gzipped):
@@ -157,11 +165,11 @@ def get_species_strings(count_summary):
     for v in binary_dictionary.values():
         binary_list.append(v)
     brucella_binary = binary_list[:16]
-    brucella_string = ''.join(str(e) for e in brucella_binary)
+    brucella_string = "".join(str(e) for e in brucella_binary)
     bovis_binary = binary_list[16:24]
-    bovis_string = ''.join(str(e) for e in bovis_binary)
+    bovis_string = "".join(str(e) for e in bovis_binary)
     para_binary = binary_list[24:]
-    para_string = ''.join(str(e) for e in para_binary)
+    para_string = "".join(str(e) for e in para_binary)
     return brucella_string, bovis_string, para_string
 
 
@@ -194,15 +202,42 @@ def output_metrics(file_name, count_list, group, dbkey, output_file):
         fh.write("\ndbkey: %s\n" % dbkey)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
 
-    parser.add_argument('--dnaprint_fields', action='append', dest='dnaprint_fields', nargs=2, help="List of dnaprints data table value, name and path fields")
-    parser.add_argument('--read1', action='store', dest='read1', help='Required: single read')
-    parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read')
-    parser.add_argument('--gzipped', action='store_true', dest='gzipped', help='Input files are gzipped')
-    parser.add_argument('--output_dbkey', action='store', dest='output_dbkey', help='Output reference file')
-    parser.add_argument('--output_metrics', action='store', dest='output_metrics', help='Output metrics file')
+    parser.add_argument(
+        "--dnaprint_fields",
+        action="append",
+        dest="dnaprint_fields",
+        nargs=2,
+        help="List of dnaprints data table value, name and path fields",
+    )
+    parser.add_argument(
+        "--read1", action="store", dest="read1", help="Required: single read"
+    )
+    parser.add_argument(
+        "--read2",
+        action="store",
+        dest="read2",
+        required=False,
+        default=None,
+        help="Optional: paired read",
+    )
+    parser.add_argument(
+        "--gzipped", action="store_true", dest="gzipped", help="Input files are gzipped"
+    )
+    parser.add_argument(
+        "--output_dbkey",
+        action="store",
+        dest="output_dbkey",
+        help="Output reference file",
+    )
+    parser.add_argument(
+        "--output_metrics",
+        action="store",
+        dest="output_metrics",
+        help="Output metrics file",
+    )
 
     args = parser.parse_args()
 
@@ -219,7 +254,24 @@ def output_metrics(file_name, count_list, group, dbkey, output_file):
 
     # Here fastq_list consists of either a single read
     # or a set of paired reads, producing single outputs.
-    count_summary, count_list, brucella_sum, bovis_sum, para_sum = get_species_counts(fastq_list, args.gzipped)
+    count_summary, count_list, brucella_sum, bovis_sum, para_sum = get_species_counts(
+        fastq_list, args.gzipped
+    )
     brucella_string, bovis_string, para_string = get_species_strings(count_summary)
-    group, dbkey = get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum)
-    output_files(args.read1, count_list, group, dbkey, dbkey_file=args.output_dbkey, metrics_file=args.output_metrics)
+    group, dbkey = get_group_and_dbkey(
+        dnaprints_dict,
+        brucella_string,
+        brucella_sum,
+        bovis_string,
+        bovis_sum,
+        para_string,
+        para_sum,
+    )
+    output_files(
+        args.read1,
+        count_list,
+        group,
+        dbkey,
+        dbkey_file=args.output_dbkey,
+        metrics_file=args.output_metrics,
+    )