diff --git a/.gitignore b/.gitignore
index 01de547a..c6161e94 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,4 +18,3 @@ out
 scripts/helper_scripts/parser/output
 scripts/helper_scripts/parser/src/META-INF
 .idea/
-
diff --git a/scripts/build_binaries.sh b/scripts/build_binaries.sh
new file mode 100755
index 00000000..adfb5c93
--- /dev/null
+++ b/scripts/build_binaries.sh
@@ -0,0 +1,19 @@
+#! /usr/bin/env bash
+
+# All references to an external script should be relative to the location of this script.
+# See: http://mywiki.wooledge.org/BashFAQ/028
+CURRENT_LOCATION="${BASH_SOURCE%/*}"
+
+checkdep() {
+    which $1 > /dev/null 2>&1 || hash $1 > /dev/null 2>&1 || {
+        echo "Unipept database builder requires ${2:-$1} to be installed." >&2
+        exit 1
+    }
+}
+
+checkdep cargo "Rust toolchain"
+
+# Build binaries and copy them to the /helper_scripts folder
+cd $CURRENT_LOCATION/helper_scripts/unipept-database-rs
+cargo build --release
+find ./target/release -maxdepth 1 -type f -executable -exec cp {} .. \;
diff --git a/scripts/build_database.sh b/scripts/build_database.sh
index a44d13a0..08f40152 100755
--- a/scripts/build_database.sh
+++ b/scripts/build_database.sh
@@ -36,7 +36,7 @@ Required parameters:
     - swissprot: https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz
     - trembl: https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz
 
-  * OUTPUT_DIR: Directory in which the tsv.gz-files that are produced by this script will be stored.
+  * OUTPUT_DIR: Directory in which the tsv.lz4-files that are produced by this script will be stored.
 
 Options:
   * -h
@@ -71,14 +71,12 @@ Dependencies:
   This script requires some non-standard dependencies to be installed before it can be used. This is a list of these
   items (which can normally be installed through your package manager):
 
-  * maven
-  * node-js
   * curl
   * pv
   * pigz
-  * java
   * uuidgen
   * parallel
+  * lz4
 END
 }
 
@@ -245,12 +243,10 @@ checkDirectoryAndCreate "$4"
 
 ### Check that all dependencies required for this script to function are met.
 checkdep curl
-checkdep java
-checkdep mvn "Maven"
 checkdep uuidgen
 checkdep pv
-checkdep node
 checkdep pigz
+checkdep lz4
 
 ### Default configuration for this script
 PEPTIDE_MIN_LENGTH=5 # What is the minimum length (inclusive) for tryptic peptides?"
@@ -258,9 +254,11 @@ PEPTIDE_MAX_LENGTH=50 # What is the maximum length (inclusive) for tryptic pepti
 TABDIR="$OUTPUT_DIR" # Where should I store the final TSV files (large, single-write)?
 INTDIR="$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT" # Where should I store intermediate TSV files (large, single-write, multiple-read?
 KMER_LENGTH=9 # What is the length (k) of the K-mer peptides?
-JAVA_MEM="2g" # How much memory should Java use?
 CMD_SORT="sort --buffer-size=$SORT_MEMORY --parallel=4" # Which sort command should I use?
-CMD_GZIP="gzip -" # Which pipe compression command should I use?
+CMD_GZIP="pigz -" # Which pipe compression command should I use for .gz files?
+CMD_ZCAT="pigz -dc" # Which decompression command should I use for .gz files?
+CMD_LZ4="lz4 -c" # Which pipe compression command should I use for .lz4 files?
+CMD_LZ4CAT="lz4 -dc" # Which decompression command should I use for .lz4 files?
 ENTREZ_BATCH_SIZE=1000 # Which batch size should I use for communication with Entrez?
 
 TAXON_FALLBACK_URL="https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip"
@@ -301,7 +299,22 @@ guz() {
 	fifo="$(uuidgen)-$(basename "$1")"
 	mkfifo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
 	echo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
-	{ zcat "$1" > "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" && rm "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" || kill "$self"; } > /dev/null &
+	{ $CMD_ZCAT "$1" > "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" && rm "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" || kill "$self"; } > /dev/null &
+}
+
+lz() {
+	fifo="$(uuidgen)-$(basename "$1")"
+	mkfifo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
+	echo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
+	mkdir -p "$(dirname "$1")"
+	{ $CMD_LZ4 - < "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" > "$1" && rm "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" || kill "$self"; } > /dev/null &
+}
+
+luz() {
+	fifo="$(uuidgen)-$(basename "$1")"
+	mkfifo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
+	echo "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo"
+	{ $CMD_LZ4CAT "$1" > "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" && rm "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/$fifo" || kill "$self"; } > /dev/null &
 }
 
 have() {
@@ -350,10 +363,10 @@ create_taxon_tables() {
 		-e 's/parvorder/no rank/' "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/nodes.dmp"
 
 	mkdir -p "$OUTPUT_DIR"
-	java -Xms"$JAVA_MEM" -Xmx"$JAVA_MEM" -jar "$CURRENT_LOCATION/helper_scripts/NamesNodes2TaxonsLineages.jar" \
+	$CURRENT_LOCATION/helper_scripts/taxons-lineages \
 		--names "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/names.dmp" --nodes "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/nodes.dmp" \
-		--taxons "$(gz "$OUTPUT_DIR/taxons.tsv.gz")" \
-		--lineages "$(gz "$OUTPUT_DIR/lineages.tsv.gz")"
+		--taxons "$(lz "$OUTPUT_DIR/taxons.tsv.lz4")" \
+		--lineages "$(lz "$OUTPUT_DIR/lineages.tsv.lz4")"
 
 	rm "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/names.dmp" "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/nodes.dmp"
 	log "Finished creating the taxon tables."
@@ -368,7 +381,8 @@ download_and_convert_all_sources() {
   DB_TYPES_ARRAY=($DB_TYPES)
   DB_SOURCES_ARRAY=($DB_SOURCES)
 
-  IFS="$OLDIFS"
+  # Set IFS to newline to properly split the $CHUNKS variable for folders with newlines
+  IFS=$'\n'
 
   while [[ "$IDX" -ne "${#DB_TYPES_ARRAY}" ]] && [[ -n $(echo "${DB_TYPES_ARRAY[$IDX]}" | sed "s/\s//g") ]]
   do
@@ -396,7 +410,7 @@ download_and_convert_all_sources() {
 
       reportProgress -1 "Downloading database index for $DB_TYPE." 3
 
-      curl --continue-at - --create-dirs "$DB_SOURCE" --silent | zcat | java -jar "$CURRENT_LOCATION/helper_scripts/XmlToTabConverter.jar" 5 50 "$DB_TYPE" "$VERBOSE" | node "$CURRENT_LOCATION/helper_scripts/WriteToChunk.js" "$DB_INDEX_OUTPUT" "$VERBOSE"
+      curl --continue-at - --create-dirs "$DB_SOURCE" --silent | pv -i 5 -n -s "$SIZE" 2> >(reportProgress - "Downloading database index for $DB_TYPE." 3 >&2) | $CMD_ZCAT | $CURRENT_LOCATION/helper_scripts/xml-parser -t "$DB_TYPE" | $CURRENT_LOCATION/helper_scripts/write-to-chunk --output-dir "$DB_INDEX_OUTPUT"
 
       # Now, compress the different chunks
       CHUNKS=$(find "$DB_INDEX_OUTPUT" -name "*.chunk")
@@ -407,7 +421,7 @@ download_and_convert_all_sources() {
       for CHUNK in $CHUNKS
       do
         echo "Compressing $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE"
-        pv -i 5 -n "$CHUNK" 2> >(reportProgress - "Processing chunk $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE index." 4 >&2) | pigz > "$CHUNK.gz"
+        pv -i 5 -n "$CHUNK" 2> >(reportProgress - "Processing chunk $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE index." 4 >&2) | lz4 -c > "$CHUNK.lz4"
         # Remove the chunk that was just compressed
         rm "$CHUNK"
         CHUNK_IDX=$((CHUNK_IDX + 1))
@@ -440,7 +454,7 @@ download_and_convert_all_sources() {
 
         SIZE="$(curl -I "$DB_SOURCE" -s | grep -i content-length | tr -cd '[0-9]')"
 
-        curl --continue-at - --create-dirs "$DB_SOURCE" --silent | pv -i 5 -n -s "$SIZE" 2> >(reportProgress - "Downloading database index for $DB_TYPE." 3 >&2) | zcat | java -jar "$CURRENT_LOCATION/helper_scripts/XmlToTabConverter.jar" 5 50 "$DB_TYPE" "$VERBOSE" | node "$CURRENT_LOCATION/helper_scripts/WriteToChunk.js" "$DB_INDEX_OUTPUT" "$VERBOSE"
+        curl --continue-at - --create-dirs "$DB_SOURCE" --silent | pv -i 5 -n -s "$SIZE" 2> >(reportProgress - "Downloading database index for $DB_TYPE." 3 >&2) | $CMD_ZCAT | $CURRENT_LOCATION/helper_scripts/xml-parser -t "$DB_TYPE" | $CURRENT_LOCATION/helper_scripts/write-to-chunk --output-dir "$DB_INDEX_OUTPUT"
 
         # Now, compress the different chunks
         CHUNKS=$(find "$DB_INDEX_OUTPUT" -name "*.chunk")
@@ -451,7 +465,7 @@ download_and_convert_all_sources() {
         for CHUNK in $CHUNKS
         do
           echo "Compressing $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE"
-          pv -i 5 -n "$CHUNK" 2> >(reportProgress - "Processing chunk $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE index." 4 >&2) | pigz > "$CHUNK.gz"
+          pv -i 5 -n "$CHUNK" 2> >(reportProgress - "Processing chunk $CHUNK_IDX of $TOTAL_CHUNKS for $DB_TYPE index." 4 >&2) | lz4 -c > "$CHUNK.lz4"
           # Remove the chunk that was just compressed
           rm "$CHUNK"
           CHUNK_IDX=$((CHUNK_IDX + 1))
@@ -465,6 +479,8 @@ download_and_convert_all_sources() {
 
     IDX=$((IDX + 1))
   done
+
+  IFS="$OLDIFS"
 }
 
 filter_sources_by_taxa() {
@@ -491,176 +507,142 @@ filter_sources_by_taxa() {
 
     mkdir -p "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/filter"
 
-    $CURRENT_LOCATION/helper_scripts/filter_taxa.sh "$TAXA" "$DB_INDEX_OUTPUT" "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/filter" "$OUTPUT_DIR/lineages.tsv.gz"
+    $CURRENT_LOCATION/helper_scripts/filter_taxa.sh "$TAXA" "$DB_INDEX_OUTPUT" "$TEMP_DIR/$UNIPEPT_TEMP_CONSTANT/filter" "$OUTPUT_DIR/lineages.tsv.lz4"
 
     IDX=$((IDX + 1))
   done
 }
 
 create_most_tables() {
-	have "$OUTPUT_DIR/taxons.tsv.gz" || return
+	have "$OUTPUT_DIR/taxons.tsv.lz4" || return
 	log "Started calculation of most tables."
 
   reportProgress "-1" "Started building main database tables." 5
 
 	mkdir -p "$OUTPUT_DIR" "$INTDIR"
 
-	if [ $VERBOSE = "true" ]
-	then
-	  $VERBOSE_FLAG="--verbose"
-  fi
-
-	cat - | java -Xms"$JAVA_MEM" -Xmx"$JAVA_MEM" -jar "$CURRENT_LOCATION/helper_scripts/TaxonsUniprots2Tables.jar" \
+	cat - | $CURRENT_LOCATION/helper_scripts/taxons-uniprots-tables \
 		--peptide-min "$PEPTIDE_MIN_LENGTH" \
 		--peptide-max "$PEPTIDE_MAX_LENGTH" \
-		--taxons "$(guz "$OUTPUT_DIR/taxons.tsv.gz")" \
-		--peptides "$(gz "$INTDIR/peptides.tsv.gz")" \
-		--uniprot-entries "$(gz "$OUTPUT_DIR/uniprot_entries.tsv.gz")" \
-		--ec "$(gz "$OUTPUT_DIR/ec_cross_references.tsv.gz")" \
-		--go "$(gz "$OUTPUT_DIR/go_cross_references.tsv.gz")" \
-		--interpro "$(gz "$OUTPUT_DIR/interpro_cross_references.tsv.gz")" \
-		$VERBOSE_FLAG
-
-	log "Finished calculation of most tables with status $?"
+		--taxons "$(luz "$OUTPUT_DIR/taxons.tsv.lz4")" \
+		--peptides "$(lz "$INTDIR/peptides-out.tsv.lz4")" \
+		--uniprot-entries "$(lz "$OUTPUT_DIR/uniprot_entries.tsv.lz4")" \
+		--ec "$(lz "$OUTPUT_DIR/ec_cross_references.tsv.lz4")" \
+		--go "$(lz "$OUTPUT_DIR/go_cross_references.tsv.lz4")" \
+		--interpro "$(lz "$OUTPUT_DIR/interpro_cross_references.tsv.lz4")"
+
+  log "Started sorting peptides table"
+
+  $CMD_LZ4CAT $INTDIR/peptides-out.tsv.lz4 \
+    | LC_ALL=C $CMD_SORT -k2 \
+    | $CMD_LZ4 > $INTDIR/peptides-equalized.tsv.lz4
+
+  rm $INTDIR/peptides-out.tsv.lz4
+  log "Finished calculation of most tables with status $?"
 }
 
 create_tables_and_filter() {
   filter_sources_by_taxa | create_most_tables
 }
 
-join_equalized_pepts_and_entries() {
-  echo "Test if files for joining peptides are available."
-	have "$INTDIR/peptides.tsv.gz" "$OUTPUT_DIR/uniprot_entries.tsv.gz" || return
-	log "Started the joining of equalized peptides and uniprot entries."
-	mkfifo "peptides_eq" "entries_eq"
-	zcat "$INTDIR/peptides.tsv.gz" | gawk '{ printf("%012d\t%s\n", $4, $2) }' > "peptides_eq" &
-	zcat "$OUTPUT_DIR/uniprot_entries.tsv.gz" | gawk '{ printf("%012d\t%s\n", $1, $4) }' > "entries_eq" &
-	join -t '	' -o '1.2,2.2' -j 1 "peptides_eq" "entries_eq" \
-		| LC_ALL=C $CMD_SORT -k1 \
-		| $CMD_GZIP - > "$INTDIR/aa_sequence_taxon_equalized.tsv.gz"
-	rm "peptides_eq" "entries_eq"
-	log "Finished the joining of equalized peptides and uniprot entries with status $?."
-}
 
+number_sequences() {
+  have "$INTDIR/peptides-equalized.tsv.lz4" || return
+	log "Started the numbering of sequences."
 
-join_original_pepts_and_entries() {
-	have "$INTDIR/peptides.tsv.gz" "$OUTPUT_DIR/uniprot_entries.tsv.gz" || return
-	log "Started the joining of original peptides and uniprot entries."
-	mkfifo "peptides_orig" "entries_orig"
-	zcat "$INTDIR/peptides.tsv.gz" | gawk '{ printf("%012d\t%s\n", $4, $3) }' > "peptides_orig" &
-	zcat "$OUTPUT_DIR/uniprot_entries.tsv.gz" | gawk '{ printf("%012d\t%s\n", $1, $4) }' > "entries_orig" &
-	join -t '	' -o '1.2,2.2' -j 1 "peptides_orig" "entries_orig" \
-		| LC_ALL=C $CMD_SORT -k1 \
-		| $CMD_GZIP - > "$INTDIR/aa_sequence_taxon_original.tsv.gz"
-	rm "peptides_orig" "entries_orig"
-	log "Finished the joining of original peptides and uniprot entries with status $?."
-}
+	mkfifo "p_eq"
+	mkfifo "p_or"
 
+	$CMD_LZ4CAT $INTDIR/peptides-equalized.tsv.lz4 | cut -f 3 | sort | uniq > "p_or" &
+	$CMD_LZ4CAT $INTDIR/peptides-equalized.tsv.lz4 | cut -f 2 | uniq > "p_eq" &
+
+	sort -u -m "p_or" "p_eq" | cat -n \
+		| sed 's/^ *//' | $CMD_LZ4 - > "$INTDIR/sequences.tsv.lz4"
+
+	rm "p_eq" "p_or"
 
-number_sequences() {
-	have "$INTDIR/aa_sequence_taxon_equalized.tsv.gz" "$INTDIR/aa_sequence_taxon_original.tsv.gz" || return
-	log "Started the numbering of sequences."
-	mkfifo "equalized" "original"
-	zcat "$INTDIR/aa_sequence_taxon_equalized.tsv.gz" | cut -f1 | uniq > "equalized" &
-	zcat "$INTDIR/aa_sequence_taxon_original.tsv.gz" | cut -f1 | uniq > "original" &
-	LC_ALL=C $CMD_SORT -m "equalized" "original" | uniq | cat -n \
-		| sed 's/^ *//' | $CMD_GZIP - > "$INTDIR/sequences.tsv.gz"
-	rm "equalized" "original"
 	log "Finished the numbering of sequences with status $?."
 }
 
+substitute_aas() {
+  have "$INTDIR/peptides-equalized.tsv.lz4" "$INTDIR/sequences.tsv.lz4"
+
+  log "Started the substitution of equalized AA's by ID's for the peptides."
+  $CMD_LZ4CAT $INTDIR/peptides-equalized.tsv.lz4 \
+    | join -t '	' -o '1.1,2.1,1.3,1.4,1.5,1.6' -1 2 -2 2 - "$(luz "$INTDIR/sequences.tsv.lz4")" \
+    | $CMD_LZ4 - > "$INTDIR/peptides_by_equalized.tsv.lz4"
+
+  rm "$INTDIR/peptides-equalized.tsv.lz4"
+  log "Finished the substitution of equalized AA's by ID's for the peptides with status $?."
+
+  log "Started the substitution of original AA's by ID's for the peptides."
+  $CMD_LZ4CAT "$INTDIR/peptides_by_equalized.tsv.lz4" \
+    | LC_ALL=C $CMD_SORT -k 3b,3 \
+    | join -t '	' -o '1.1,1.2,2.1,1.4,1.5,1.6' -1 3 -2 2 - "$(luz "$INTDIR/sequences.tsv.lz4")" \
+    | $CMD_LZ4 - > "$INTDIR/peptides_by_original.tsv.lz4"
+
+  log "Finished the substitution of original AA's by ID's for the peptides with status $?."
+}
 
 calculate_equalized_lcas() {
-	have "$INTDIR/sequences.tsv.gz" "$INTDIR/aa_sequence_taxon_equalized.tsv.gz" "$OUTPUT_DIR/lineages.tsv.gz" || return
-	log "Started the calculation of equalized LCA's (after substituting AA's by ID's)."
-	join -t '	' -o '1.1,2.2' -1 2 -2 1 \
-			"$(guz "$INTDIR/sequences.tsv.gz")" \
-			"$(guz "$INTDIR/aa_sequence_taxon_equalized.tsv.gz")" \
-		| java -Xms"$JAVA_MEM" -Xmx"$JAVA_MEM" -jar "$CURRENT_LOCATION/helper_scripts/LineagesSequencesTaxons2LCAs.jar" "$(guz "$OUTPUT_DIR/lineages.tsv.gz")" \
-		| $CMD_GZIP - > "$INTDIR/LCAs_equalized.tsv.gz"
+	have "$INTDIR/peptides_by_equalized.tsv.lz4" || return
+	log "Started the calculation of equalized LCA's."
+	$CMD_LZ4CAT $INTDIR/peptides_by_equalized.tsv.lz4 | cut -f 2,6 \
+		| $CURRENT_LOCATION/helper_scripts/lcas --infile "$(luz "$OUTPUT_DIR/lineages.tsv.lz4")" \
+		| $CMD_LZ4 - > "$INTDIR/LCAs_equalized.tsv.lz4"
 	log "Finished the calculation of equalized LCA's (after substituting AA's by ID's) with status $?."
 }
 
 
 calculate_original_lcas() {
-	have "$INTDIR/sequences.tsv.gz" "$INTDIR/aa_sequence_taxon_original.tsv.gz" "$OUTPUT_DIR/lineages.tsv.gz" || return
-	log "Started the calculation of original LCA's (after substituting AA's by ID's)."
-	join -t '	' -o '1.1,2.2' -1 2 -2 1 \
-			"$(guz "$INTDIR/sequences.tsv.gz")" \
-			"$(guz "$INTDIR/aa_sequence_taxon_original.tsv.gz")" \
-		| java -Xms"$JAVA_MEM" -Xmx"$JAVA_MEM" -jar "$CURRENT_LOCATION/helper_scripts/LineagesSequencesTaxons2LCAs.jar" "$(guz "$OUTPUT_DIR/lineages.tsv.gz")" \
-		| $CMD_GZIP - > "$INTDIR/LCAs_original.tsv.gz"
+	have "$INTDIR/peptides_by_original.tsv.lz4" || return
+	log "Started the calculation of original LCA's"
+	$CMD_LZ4CAT $INTDIR/peptides_by_original.tsv.lz4 | cut -f 3,6 \
+		| $CURRENT_LOCATION/helper_scripts/lcas --infile "$(luz "$OUTPUT_DIR/lineages.tsv.lz4")" \
+		| $CMD_LZ4 - > "$INTDIR/LCAs_original.tsv.lz4"
 	log "Finished the calculation of original LCA's (after substituting AA's by ID's) with status $?."
 }
 
 
-substitute_equalized_aas() {
-	have "$INTDIR/peptides.tsv.gz" "$INTDIR/sequences.tsv.gz" || return
-	log "Started the substitution of equalized AA's by ID's for the peptides."
-	zcat "$INTDIR/peptides.tsv.gz" \
-		| LC_ALL=C $CMD_SORT -k 2b,2 \
-		| join -t '	' -o '1.1,2.1,1.3,1.4,1.5' -1 2 -2 2 - "$(guz "$INTDIR/sequences.tsv.gz")" \
-		| $CMD_GZIP - > "$INTDIR/peptides_by_equalized.tsv.gz"
-	log "Finished the substitution of equalized AA's by ID's for the peptides with status $?."
-}
-
-
 calculate_equalized_fas() {
-	have "$INTDIR/peptides_by_equalized.tsv.gz" || return
+	have "$INTDIR/peptides_by_equalized.tsv.lz4" || return
 	log "Started the calculation of equalized FA's."
 	mkfifo "peptides_eq"
-	zcat "$INTDIR/peptides_by_equalized.tsv.gz" | cut -f2,5 > "peptides_eq" &
-	node  "$CURRENT_LOCATION/helper_scripts/FunctionalAnalysisPeptides.js" "peptides_eq" "$(gz "$INTDIR/FAs_equalized.tsv.gz")"
+	$CMD_LZ4CAT "$INTDIR/peptides_by_equalized.tsv.lz4" | cut -f2,5 > "peptides_eq" &
+	$CURRENT_LOCATION/helper_scripts/functional-analysis -i "peptides_eq" -o "$(lz "$INTDIR/FAs_equalized.tsv.lz4")"
 	rm "peptides_eq"
 	log "Finished the calculation of equalized FA's with status $?."
 }
 
 
-substitute_original_aas() {
-	have "$INTDIR/peptides_by_equalized.tsv.gz" "$INTDIR/sequences.tsv.gz" || return
-	log "Started the substitution of original AA's by ID's for the peptides."
-	zcat "$INTDIR/peptides_by_equalized.tsv.gz" \
-		| LC_ALL=C $CMD_SORT -k 3b,3 \
-		| join -t '	' -o '1.1,1.2,2.1,1.4,1.5' -1 3 -2 2 - "$(guz "$INTDIR/sequences.tsv.gz")" \
-		| $CMD_GZIP - > "$INTDIR/peptides_by_original.tsv.gz"
-	log "Finished the substitution of equalized AA's by ID's for the peptides with status $?."
-}
-
 calculate_original_fas() {
-	have "$INTDIR/peptides_by_original.tsv.gz" || return
+	have "$INTDIR/peptides_by_original.tsv.lz4" || return
 	log "Started the calculation of original FA's."
 	mkfifo "peptides_orig"
-	zcat "$INTDIR/peptides_by_original.tsv.gz" | cut -f3,5 > "peptides_orig" &
-	node  "$CURRENT_LOCATION/helper_scripts/FunctionalAnalysisPeptides.js" "peptides_orig" "$(gz "$INTDIR/FAs_original.tsv.gz")"
+	$CMD_LZ4CAT "$INTDIR/peptides_by_original.tsv.lz4" | cut -f3,5 > "peptides_orig" &
+	$CURRENT_LOCATION/helper_scripts/functional-analysis -i "peptides_orig" -o "$(lz "$INTDIR/FAs_original.tsv.lz4")"
 	rm "peptides_orig"
 	log "Finished the calculation of original FA's."
 }
 
-sort_peptides() {
-	have "$INTDIR/peptides_by_original.tsv.gz" || return
-	log "Started sorting the peptides table."
-	mkdir -p "$OUTPUT_DIR"
-	zcat "$INTDIR/peptides_by_original.tsv.gz" \
-		| LC_ALL=C $CMD_SORT -n \
-		| $CMD_GZIP - > "$OUTPUT_DIR/peptides.tsv.gz"
-	log "Finished sorting the peptides table."
-}
 
 create_sequence_table() {
-	have "$INTDIR/LCAs_original.tsv.gz" "$INTDIR/LCAs_equalized.tsv.gz" "$INTDIR/FAs_original.tsv.gz" "$INTDIR/FAs_equalized.tsv.gz" "$INTDIR/sequences.tsv.gz" || return
+	have "$INTDIR/LCAs_original.tsv.lz4" "$INTDIR/LCAs_equalized.tsv.lz4" "$INTDIR/FAs_original.tsv.lz4" "$INTDIR/FAs_equalized.tsv.lz4" "$INTDIR/sequences.tsv.lz4" || return
 	log "Started the creation of the sequences table."
 	mkdir -p "$OUTPUT_DIR"
 	mkfifo "olcas" "elcas" "ofas" "efas"
-	zcat "$INTDIR/LCAs_original.tsv.gz"  | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "olcas" &
-	zcat "$INTDIR/LCAs_equalized.tsv.gz" | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "elcas" &
-	zcat "$INTDIR/FAs_original.tsv.gz"   | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "ofas" &
-	zcat "$INTDIR/FAs_equalized.tsv.gz"  | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "efas" &
-	zcat "$INTDIR/sequences.tsv.gz"      | gawk '{ printf("%012d\t%s\n", $1, $2) }' \
+	$CMD_LZ4CAT "$INTDIR/LCAs_original.tsv.lz4"  | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "olcas" &
+	$CMD_LZ4CAT "$INTDIR/LCAs_equalized.tsv.lz4" | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "elcas" &
+	$CMD_LZ4CAT "$INTDIR/FAs_original.tsv.lz4"   | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "ofas" &
+	$CMD_LZ4CAT "$INTDIR/FAs_equalized.tsv.lz4"  | gawk '{ printf("%012d\t%s\n", $1, $2) }' > "efas" &
+	$CMD_LZ4CAT "$INTDIR/sequences.tsv.lz4"      | gawk '{ printf("%012d\t%s\n", $1, $2) }' \
 		| join --nocheck-order -a1 -e '\N' -t '	' -o "1.1 1.2 2.2" - "olcas" \
 		| join --nocheck-order -a1 -e '\N' -t '	' -o "1.1 1.2 1.3 2.2" - "elcas" \
 		| join --nocheck-order -a1 -e '\N' -t '	' -o '1.1 1.2 1.3 1.4 2.2' - "ofas" \
 		| join --nocheck-order -a1 -e '\N' -t '	' -o '1.1 1.2 1.3 1.4 1.5 2.2' - "efas" \
-		| sed 's/^0*//' | $CMD_GZIP - > "$OUTPUT_DIR/sequences.tsv.gz"
+		| sed 's/^0*//' \
+		| awk -F'\t' 'BEGIN {OFS="\t"} {gsub(/Z/, "K", $2); print}' \
+		| $CMD_LZ4 - > "$OUTPUT_DIR/sequences.tsv.lz4"
 	rm "olcas" "elcas" "ofas" "efas"
 	log "Finished the creation of the sequences table."
 }
@@ -679,7 +661,7 @@ fetch_ec_numbers() {
 			/^DE/ { gsub(/.$/, "", $2)
 			        name = name $2 }
 			END   { print id, name }'
-	} | cat -n | sed 's/^ *//' | $CMD_GZIP - > "$OUTPUT_DIR/ec_numbers.tsv.gz"
+	} | cat -n | sed 's/^ *//' | $CMD_LZ4 - > "$OUTPUT_DIR/ec_numbers.tsv.lz4"
 	log "Finished creating EC numbers."
 }
 
@@ -711,14 +693,14 @@ fetch_go_terms() {
 					id++
 				}
 			}
-			type = "" }' | $CMD_GZIP - > "$OUTPUT_DIR/go_terms.tsv.gz"
+			type = "" }' | $CMD_LZ4 - > "$OUTPUT_DIR/go_terms.tsv.lz4"
 	log "Finished creating GO terms."
 }
 
 fetch_interpro_entries() {
 	log "Started creating InterPro Entries."
 	mkdir -p "$OUTPUT_DIR"
-	curl -s "$INTERPRO_URL" | grep '^IPR' | cat -n | sed 's/^ *//' | $CMD_GZIP - > "$OUTPUT_DIR/interpro_entries.tsv.gz"
+	curl -s "$INTERPRO_URL" | grep '^IPR' | cat -n | sed 's/^ *//' | $CMD_LZ4 - > "$OUTPUT_DIR/interpro_entries.tsv.lz4"
 	log "Finished creating InterPro Entries."
 }
 
@@ -728,10 +710,10 @@ fetch_interpro_entries() {
 #dot: create_kmer_index -> kmer_index
 #dot: kmer_index [color="#f28e2b"]
 create_kmer_index() {
-	have "$OUTPUT_DIR/uniprot_entries.tsv.gz" "$OUTPUT_DIR/taxons.tsv.gz" || return
+	have "$OUTPUT_DIR/uniprot_entries.tsv.lz4" "$OUTPUT_DIR/taxons.tsv.lz4" || return
 	log "Started the construction of the $KMER_LENGTH-mer index."
 	for PREFIX in A C D E F G H I K L M N P Q R S T V W Y; do
-		pv -N $PREFIX "$OUTPUT_DIR/uniprot_entries.tsv.gz" \
+		pv -N $PREFIX "$OUTPUT_DIR/uniprot_entries.tsv.lz4" \
 			| gunzip \
 			| cut -f4,7 \
 			| grep "^[0-9]*	[ACDEFGHIKLMNPQRSTVWY]*$" \
@@ -740,7 +722,7 @@ create_kmer_index() {
 			| LC_ALL=C $CMD_SORT \
 			| sed "s/^/$PREFIX/"
 	done \
-			| umgap joinkmers "$(guz "$OUTPUT_DIR/taxons.tsv.gz")" \
+			| umgap joinkmers "$(guz "$OUTPUT_DIR/taxons.tsv.lz4")" \
 			| cut -d'	' -f1,2 \
 			| umgap buildindex \
 			> "$OUTPUT_DIR/$KMER_LENGTH-mer.index"
@@ -752,9 +734,9 @@ create_kmer_index() {
 #dot: create_tryptic_index -> tryptic_index
 #dot: tryptic_index [color="#f28e2b"]
 create_tryptic_index() {
-	have "$TABDIR/sequences.tsv.gz" || return
+	have "$TABDIR/sequences.tsv.lz4" || return
 	log "Started the construction of the tryptic index."
-	pv "$TABDIR/sequences.tsv.gz" \
+	pv "$TABDIR/sequences.tsv.lz4" \
 		| gunzip \
 		| cut -f2,3 \
 		| grep -v "\\N" \
@@ -771,43 +753,31 @@ database)
 	download_and_convert_all_sources
 	create_tables_and_filter
 	echo "Created tables!"
-	join_equalized_pepts_and_entries &
-	pid1=$!
-	join_original_pepts_and_entries &
-	pid2=$!
-	wait $pid1
-	wait $pid2
 	number_sequences
-	reportProgress "-1" "Calculating lowest common ancestors." 6
+  substitute_aas
+	reportProgress "-1" "Calculating lowest common ancestors and functional annotations." 6
 	calculate_equalized_lcas &
 	pid1=$!
 	calculate_original_lcas &
 	pid2=$!
-	wait $pid1
-	wait $pid2
-	rm "$INTDIR/aa_sequence_taxon_equalized.tsv.gz"
-	rm "$INTDIR/aa_sequence_taxon_original.tsv.gz"
-	substitute_equalized_aas
-	rm "$INTDIR/peptides.tsv.gz"
-	substitute_original_aas
-	reportProgress "-1" "Calculating functional annotations." 7
 	calculate_equalized_fas &
-	pid1=$!
+	pid3=$!
 	calculate_original_fas &
-	pid2=$!
+	pid4=$!
 	wait $pid1
 	wait $pid2
-	rm "$INTDIR/peptides_by_equalized.tsv.gz"
-	reportProgress "-1" "Sorting peptides." 8
-	sort_peptides
-	rm "$INTDIR/peptides_by_original.tsv.gz"
+	wait $pid3
+	wait $pid4
 	reportProgress "-1" "Creating sequence table." 9
 	create_sequence_table
-	rm "$INTDIR/LCAs_original.tsv.gz"
-	rm "$INTDIR/LCAs_equalized.tsv.gz"
-	rm "$INTDIR/FAs_original.tsv.gz"
-	rm "$INTDIR/FAs_equalized.tsv.gz"
-	rm "$INTDIR/sequences.tsv.gz"
+	rm "$INTDIR/LCAs_original.tsv.lz4"
+	rm "$INTDIR/LCAs_equalized.tsv.lz4"
+	rm "$INTDIR/FAs_original.tsv.lz4"
+	rm "$INTDIR/FAs_equalized.tsv.lz4"
+	rm "$INTDIR/sequences.tsv.lz4"
+	rm "$INTDIR/peptides_by_equalized.tsv.lz4"
+	# Use the original sort as the result
+	mv "$INTDIR/peptides_by_original.tsv.lz4" "$OUTPUT_DIR/peptides.tsv.lz4"
 	reportProgress "-1" "Fetching EC numbers." 10
 	fetch_ec_numbers
 	reportProgress "-1" "Fetching GO terms." 11
@@ -815,11 +785,11 @@ database)
 	reportProgress "-1" "Fetching InterPro entries." 12
 	fetch_interpro_entries
 	reportProgress "-1" "Computing database indices" 13
-	ENTRIES=$(zcat "$OUTPUT_DIR/uniprot_entries.tsv.gz" | wc -l)
+	ENTRIES=$($CMD_LZ4CAT "$OUTPUT_DIR/uniprot_entries.tsv.lz4" | wc -l)
 	echo "Database contains: ##$ENTRIES##"
 	;;
 static-database)
-	if ! have "$TABDIR/taxons.tsv.gz"; then
+	if ! have "$TABDIR/taxons.tsv.lz4"; then
 		create_taxon_tables
 	fi
 	fetch_ec_numbers
@@ -830,10 +800,10 @@ kmer-index)
 	checkdep pv
 	checkdep umgap "umgap crate (for umgap buildindex)"
 
-	if ! have "$OUTPUT_DIR/taxons.tsv.gz"; then
+	if ! have "$OUTPUT_DIR/taxons.tsv.lz4"; then
 		create_taxon_tables
 	fi
-	if ! have "$OUTPUT_DIR/uniprot_entries.tsv.gz"; then
+	if ! have "$OUTPUT_DIR/uniprot_entries.tsv.lz4"; then
 		download_and_convert_all_sources
 		create_tables_and_filter
 	fi
@@ -843,22 +813,20 @@ tryptic-index)
 	checkdep pv
 	checkdep umgap "umgap crate (for umgap buildindex)"
 
-	if ! have "$TABDIR/taxons.tsv.gz"; then
+	if ! have "$TABDIR/taxons.tsv.lz4"; then
 		create_taxon_tables
 	fi
-	if ! have "$TABDIR/sequences.tsv.gz"; then
+	if ! have "$TABDIR/sequences.tsv.lz4"; then
 		download_and_convert_all_sources
 		create_tables_and_filter
-		join_equalized_pepts_and_entries
-		join_original_pepts_and_entries
 		number_sequences
+		substitute_aas
 		calculate_equalized_lcas
 		calculate_original_lcas
-		substitute_equalized_aas
 		calculate_equalized_fas
-		substitute_original_aas
 		calculate_original_fas
 		create_sequence_table
+		# TODO remove temp files
 	fi
 	create_tryptic_index
 	;;
diff --git a/scripts/helper_scripts/.gitignore b/scripts/helper_scripts/.gitignore
new file mode 100644
index 00000000..38df1f8a
--- /dev/null
+++ b/scripts/helper_scripts/.gitignore
@@ -0,0 +1,9 @@
+# Ignore the compiled binaries that get moved here
+dat-parser
+functional-analysis
+lcas
+taxa-by-chunk
+taxons-lineages
+taxons-uniprots-tables
+write-to-chunk
+xml-parser
diff --git a/scripts/helper_scripts/FunctionalAnalysisPeptides.js b/scripts/helper_scripts/FunctionalAnalysisPeptides.js
deleted file mode 100755
index 2e635468..00000000
--- a/scripts/helper_scripts/FunctionalAnalysisPeptides.js
+++ /dev/null
@@ -1,73 +0,0 @@
-const readline = require('readline');
-const fs = require('fs');
-const start = new Date().getTime();
-const args = process.argv;
-if (args.length !== 4) {
-  console.log("Please provide 2 parameters: input and output.");
-  process.exit(1);
-}
-const inputFile = args[2];
-const outputFile = args[3];
-const readInterface = readline.createInterface({
-  input: fs.createReadStream(inputFile)
-});
-const writer = fs.createWriteStream(outputFile);
-let row = null;
-let curPept = null;
-let numProt = 0;
-let numAnnotatedGO = 0;
-let numAnnotatedEC = 0;
-let numAnnotatedInterPro = 0;
-let done = 0;
-let m = new Map();
-readInterface.on('line', function (line) {
-  row = line.split("\t");
-  if (row[0] !== curPept) {
-    if (curPept !== null) {
-      if (m.size !== 0) {
-        writer.write(`${curPept}\t{"num":{"all":${numProt},"EC":${numAnnotatedEC},"GO":${numAnnotatedGO},"IPR":${numAnnotatedInterPro}},"data":{${Array.from(m.entries(), ([k, v]) => `"${k}":${v}`).join(",")}}}\n`);
-      }
-    }
-    m.clear();
-    numProt = 0;
-    numAnnotatedGO = 0;
-    numAnnotatedEC = 0;
-    numAnnotatedInterPro = 0;
-    curPept = row[0];
-  }
-  numProt++;
-  if (row.length > 1) {
-    const terms = row[1].split(";");
-    let hasEC = false;
-    let hasGO = false;
-    let hasInterPro = false;
-    for (const term of terms) {
-      if (!term) {
-        continue;
-      }
-      if (term.startsWith("G")) {
-        hasGO = true;
-      } else if (term.startsWith("E")) {
-        hasEC = true;
-      } else {
-        hasInterPro = true;
-      }
-      m.set(term, (m.get(term) || 0) + 1);
-    }
-    numAnnotatedGO += hasGO ? 1 : 0;
-    numAnnotatedEC += hasEC ? 1 : 0;
-    numAnnotatedInterPro += hasInterPro ? 1 : 0;
-  }
-  done++;
-  if (done % 1000000 === 0) {
-    console.log("FA " + done + " rows");
-  }
-});
-readInterface.on('close', function () {
-  if (m.size !== 0) {
-    writer.write(`${curPept}\t{"num":{"all":${numProt},"EC":${numAnnotatedEC},"GO":${numAnnotatedGO},"IPR":${numAnnotatedInterPro}},"data":{${Array.from(m.entries(), ([k, v]) => `"${k}":${v}`).join(",")}}}\n`);
-  }
-  writer.end();
-  const end = new Date().getTime();
-  console.log("Took " + (end - start) / 1000 + "s");
-});
diff --git a/scripts/helper_scripts/LineagesSequencesTaxons2LCAs.jar b/scripts/helper_scripts/LineagesSequencesTaxons2LCAs.jar
deleted file mode 100755
index fee277c5..00000000
Binary files a/scripts/helper_scripts/LineagesSequencesTaxons2LCAs.jar and /dev/null differ
diff --git a/scripts/helper_scripts/NamesNodes2TaxonsLineages.jar b/scripts/helper_scripts/NamesNodes2TaxonsLineages.jar
deleted file mode 100644
index 1f3b817d..00000000
Binary files a/scripts/helper_scripts/NamesNodes2TaxonsLineages.jar and /dev/null differ
diff --git a/scripts/helper_scripts/ParallelXmlToTab.js b/scripts/helper_scripts/ParallelXmlToTab.js
deleted file mode 100755
index dd1557eb..00000000
--- a/scripts/helper_scripts/ParallelXmlToTab.js
+++ /dev/null
@@ -1,10 +0,0 @@
-const readline = require('readline');
-const fs = require('fs');
-
-const rl = readline.createInterface({
-    input: process.stdin
-});
-
-let buffer = "";
-
-
diff --git a/scripts/helper_scripts/TaxaByChunk.js b/scripts/helper_scripts/TaxaByChunk.js
deleted file mode 100755
index fff59f75..00000000
--- a/scripts/helper_scripts/TaxaByChunk.js
+++ /dev/null
@@ -1,50 +0,0 @@
-/**
- * This script looks for which taxa should be looked up in which chunk. The list of taxa that need to be looked up is
- * read from stdin. A list of files, taxa (thus the taxa that need to be looked up in the corresponding file) are
- * provided through stdout.
- *
- * The script requires two command line arguments: the folder in which all Unipept DB chunks are present and a
- * temporary folder that can be used by the script to store temporary files.
- */
-
-const readline = require("readline");
-const fs = require("fs");
-const path = require("path");
-
-const args = process.argv;
-
-if (args.length !== 4) {
-    console.error("This script expects exactly two parameters: unipept_db_chunk_folder temporary_folder");
-    process.exit(1);
-}
-
-const rl = readline.createInterface({
-    input: process.stdin
-});
-
-const allTaxa = [];
-
-rl.on("line", (line) => {
-    allTaxa.push(parseInt(line.trim()));
-});
-
-// In this hook we should start to link input files with the taxa that need to be looked up in there.
-rl.on("close", () => {
-    for (const file of fs.readdirSync(args[2])) {
-        const baseFile = path.basename(file);
-        if (baseFile.match(/unipept\..*\.gz/)) {
-            const range = baseFile.replace(/unipept\.|\.gz/g, '').split("-");
-            const startRange = parseInt(range[0]);
-            const endRange = parseInt(range[1]);
-
-            const matchedTaxa = allTaxa.filter(t => startRange <= t && t <= endRange);
-
-            if (matchedTaxa && matchedTaxa.length > 0) {
-                fs.writeFileSync(path.join(args[3], baseFile + ".pattern"), matchedTaxa.map(t => "\t" + t + "$").join("\n"));
-
-                console.log(path.join(args[3], baseFile + ".pattern"));
-                console.log(path.join(args[2], file));
-            }
-        }
-    }
-});
diff --git a/scripts/helper_scripts/TaxonsUniprots2Tables.jar b/scripts/helper_scripts/TaxonsUniprots2Tables.jar
deleted file mode 100644
index bf9f13e8..00000000
Binary files a/scripts/helper_scripts/TaxonsUniprots2Tables.jar and /dev/null differ
diff --git a/scripts/helper_scripts/WriteToChunk.js b/scripts/helper_scripts/WriteToChunk.js
deleted file mode 100755
index 594ae4a5..00000000
--- a/scripts/helper_scripts/WriteToChunk.js
+++ /dev/null
@@ -1,49 +0,0 @@
-const readline = require("readline");
-const fs = require("fs");
-const path = require("path");
-
-const outputDir = process.argv[2];
-
-const verbose = process.argv[3] === "true";
-
-const rl = readline.createInterface({
-    input: process.stdin
-});
-
-const taxaBounds = [
-    0, 550, 1352, 3047, 5580, 8663, 11676, 32473, 40214, 52774, 66656, 86630, 116960, 162147, 210225, 267979, 334819,
-    408172, 470868, 570509, 673318, 881260, 1046115, 1136135, 1227077, 1300307, 1410620, 1519492, 1650438, 1756149,
-    1820614, 1871070, 1898104, 1922217, 1978231, 2024617, 2026757, 2035430, 2070414, 2202732, 2382165, 2527964, 2601669,
-    2706029, 10000000
-];
-
-const fileObjects = [...Object.keys(taxaBounds)].slice(0, -1).map(idx => Number.parseInt(idx)).map(
-    idx => fs.createWriteStream(path.join(outputDir, `unipept.${taxaBounds[idx]}-${taxaBounds[idx + 1]}.chunk`))
-);
-
-let headerSkipped = false;
-
-rl.on("line", (line) => {
-    if (verbose) {
-        console.error("INFO VERBOSE: writing line to chunk: " + line);
-    }
-
-    if (!headerSkipped) {
-        headerSkipped = true;
-        const writeStream = fs.createWriteStream(path.join(outputDir, 'db.header'));
-        writeStream.write(line + "\n");
-        writeStream.close();
-        return;
-    }
-
-    const taxonId = Number.parseInt(line.split("\t")[8].trim());
-
-    let idx = 0;
-    while (taxonId > taxaBounds[idx]) {
-        idx++;
-    }
-
-    fileObjects[idx - 1].write(line + "\n");
-});
-
-rl.on("close", () => fileObjects.map(o => o.close()));
diff --git a/scripts/helper_scripts/XmlToTabConverter.jar b/scripts/helper_scripts/XmlToTabConverter.jar
deleted file mode 100644
index 734e2498..00000000
Binary files a/scripts/helper_scripts/XmlToTabConverter.jar and /dev/null differ
diff --git a/scripts/helper_scripts/filter_taxa.sh b/scripts/helper_scripts/filter_taxa.sh
index 017a512d..b9b4e76a 100755
--- a/scripts/helper_scripts/filter_taxa.sh
+++ b/scripts/helper_scripts/filter_taxa.sh
@@ -14,7 +14,7 @@ mkdir -p "$TMP_DIR"
 
 filter_taxa() {
 	QUERY=$(echo "\s$1\s" | sed "s/,/\\\s\\\|\\\s/g")
-	RESULT=$(cat "$LINEAGE_ARCHIVE" | zcat  | grep "$QUERY" | cut -f1 | sort -n | uniq | tr '\n' ',')
+	RESULT=$(lz4 -dc "$LINEAGE_ARCHIVE" | grep "$QUERY" | cut -f1 | sort -n | uniq | tr '\n' ',')
 	echo "$RESULT"
 }
 
@@ -23,16 +23,16 @@ then
   TAXA=$(filter_taxa "$TAXA")
 
   # This associative array maps a filename upon the taxa that should be queried within this file
-  QUERIES=( $(echo "$TAXA" | tr "," "\n" | node "$CURRENT_LOCATION/TaxaByChunk.js" "$DATABASE_INDEX" "$TMP_DIR") )
+  QUERIES=( $(echo "$TAXA" | tr "," "\n" | $CURRENT_LOCATION/taxa-by-chunk --chunk-dir "$DATABASE_INDEX" --temp-dir "$TMP_DIR") )
 
   if [[ ${#QUERIES[@]} -gt 0 ]]
   then
-    parallel --jobs 8 --max-args 2 "cat {2} | zcat | sed 's/$/$/' | grep -F -f {1} | sed 's/\$$//'" ::: "${QUERIES[@]}"
+    parallel --jobs 8 --max-args 2 "lz4 -dc {2} | sed 's/$/$/' | grep -F -f {1} | sed 's/\$$//'" ::: "${QUERIES[@]}"
   fi
 else
 
   # If the root ID has been passed to this script, we simply print out all database items (without filtering).
-  find "$DATABASE_INDEX" -name "*.chunk.gz" | xargs zcat
+  find "$DATABASE_INDEX" -name "*.chunk.lz4" -exec lz4 -mdc {} +
 fi
 
 # Remove temporary files
diff --git a/scripts/helper_scripts/parser/pom.xml b/scripts/helper_scripts/parser/pom.xml
deleted file mode 100644
index a8fa31d7..00000000
--- a/scripts/helper_scripts/parser/pom.xml
+++ /dev/null
@@ -1,107 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
-	<modelVersion>4.0.0</modelVersion>
-	<groupId>unipept</groupId>
-	<artifactId>unipept</artifactId>
-	<version>0.0.1-SNAPSHOT</version>
-    <properties>
-        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    </properties>
-	<build>
-		<resources>
-			<resource>
-				<directory>${basedir}/src/main/java</directory>
-			</resource>
-		</resources>
-		<testResources>
-			<testResource>
-				<directory>${basedir}/src/test/java</directory>
-			</testResource>
-			<testResource>
-				<directory>${basedir}/src/test/resources</directory>
-				<includes>
-                    <include>**/*.*</include>
-                </includes>
-			</testResource>
-		</testResources>
-		<plugins>
-			<plugin>
-				<artifactId>maven-compiler-plugin</artifactId>
-				<version>3.1</version>
-				<configuration>
-					<source>1.8</source>
-					<target>1.8</target>
-				</configuration>
-			</plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-dependency-plugin</artifactId>
-                <executions>
-                    <execution>
-                        <id>copy-dependencies</id>
-                        <phase>prepare-package</phase>
-                        <goals>
-                            <goal>copy-dependencies</goal>
-                        </goals>
-                        <configuration>
-                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
-                            <overWriteReleases>false</overWriteReleases>
-                            <overWriteSnapshots>false</overWriteSnapshots>
-                            <overWriteIfNewer>true</overWriteIfNewer>
-                        </configuration>
-                    </execution>
-                </executions>
-            </plugin>
-            <plugin>
-                <groupId>org.apache.maven.plugins</groupId>
-                <artifactId>maven-jar-plugin</artifactId>
-                <version>2.4</version>
-                <configuration>
-                    <archive>
-                        <manifest>
-                            <addClasspath>true</addClasspath>
-                            <classpathPrefix>lib/</classpathPrefix>
-                        </manifest>
-                    </archive>
-                </configuration>
-            </plugin>
-		</plugins>
-	</build>
-    <dependencies>
-        <dependency>
-            <groupId>junit</groupId>
-            <artifactId>junit</artifactId>
-            <version>4.11</version>
-        </dependency>
-        <dependency>
-            <groupId>com.beust</groupId>
-            <artifactId>jcommander</artifactId>
-            <version>1.48</version>
-        </dependency>
-        <dependency>
-            <groupId>javax.json</groupId>
-            <artifactId>javax.json-api</artifactId>
-            <version>1.1</version>
-        </dependency>
-
-        <dependency>
-            <groupId>org.glassfish</groupId>
-            <artifactId>javax.json</artifactId>
-            <version>1.1</version>
-        </dependency>
-    </dependencies>
-    <repositories>
-        <repository>
-            <id>oracleReleases</id>
-            <name>Oracle Released Java Packages</name>
-            <url>http://download.oracle.com/maven</url>
-            <layout>default</layout>
-        </repository>
-    </repositories>
-    <name>Unipept</name>
-	<url>https://github.ugent.be/bmesuere/unipept</url>
-	<description>The Unipept web application supports biodiversity analysis of large and complex metaproteome samples. </description>
-	<scm>
-		<url>https://github.ugent.be/bmesuere/unipept.git</url>
-	</scm>
-</project>
diff --git a/scripts/helper_scripts/parser/src/manifests/LineagesSequencesTaxons2LCAs/META-INF/MANIFEST.MF b/scripts/helper_scripts/parser/src/manifests/LineagesSequencesTaxons2LCAs/META-INF/MANIFEST.MF
deleted file mode 100755
index d6c5d70b..00000000
--- a/scripts/helper_scripts/parser/src/manifests/LineagesSequencesTaxons2LCAs/META-INF/MANIFEST.MF
+++ /dev/null
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Main-Class: tools.LineagesSequencesTaxons2LCAs
-
diff --git a/scripts/helper_scripts/parser/src/manifests/NamesNodes2TaxonsLineages/META-INF/MANIFEST.MF b/scripts/helper_scripts/parser/src/manifests/NamesNodes2TaxonsLineages/META-INF/MANIFEST.MF
deleted file mode 100755
index 143dcc30..00000000
--- a/scripts/helper_scripts/parser/src/manifests/NamesNodes2TaxonsLineages/META-INF/MANIFEST.MF
+++ /dev/null
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Main-Class: tools.NamesNodes2TaxonsLineages
-
diff --git a/scripts/helper_scripts/parser/src/manifests/TaxonsUniprots2Tables/META-INF/MANIFEST.MF b/scripts/helper_scripts/parser/src/manifests/TaxonsUniprots2Tables/META-INF/MANIFEST.MF
deleted file mode 100755
index 1f0472ba..00000000
--- a/scripts/helper_scripts/parser/src/manifests/TaxonsUniprots2Tables/META-INF/MANIFEST.MF
+++ /dev/null
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Main-Class: tools.TaxonsUniprots2Tables
-
diff --git a/scripts/helper_scripts/parser/src/manifests/XmlToTabConverter/META-INF/MANIFEST.MF b/scripts/helper_scripts/parser/src/manifests/XmlToTabConverter/META-INF/MANIFEST.MF
deleted file mode 100755
index 87a3f9e2..00000000
--- a/scripts/helper_scripts/parser/src/manifests/XmlToTabConverter/META-INF/MANIFEST.MF
+++ /dev/null
@@ -1,3 +0,0 @@
-Manifest-Version: 1.0
-Main-Class: tools.XmlToTabConverter
-
diff --git a/scripts/helper_scripts/parser/src/storage/CSV.java b/scripts/helper_scripts/parser/src/storage/CSV.java
deleted file mode 100755
index 1e5e9587..00000000
--- a/scripts/helper_scripts/parser/src/storage/CSV.java
+++ /dev/null
@@ -1,93 +0,0 @@
-package storage;
-
-import java.io.IOException;
-import java.io.BufferedReader;
-import java.io.BufferedWriter;
-import java.io.InputStreamReader;
-import java.io.OutputStreamWriter;
-import java.util.zip.GZIPInputStream;
-import java.util.zip.GZIPOutputStream;
-import java.io.FileInputStream;
-import java.io.FileOutputStream;
-
-public class CSV {
-
-    private static final int MB4 = 4194304;
-
-    public static class Reader {
-        private BufferedReader buffer;
-
-        public Reader(String file) throws IOException {
-            buffer = new BufferedReader(
-                new InputStreamReader(
-                    new FileInputStream(file)
-                )
-            );
-        }
-
-        public String[] read() throws IOException {
-            String line = buffer.readLine();
-            if(line == null) return null;
-            return line.split("	");
-        }
-
-        public void close() throws IOException {
-            buffer.close();
-        }
-    }
-
-    public static class Writer {
-        protected BufferedWriter buffer;
-
-        public Writer(String file) throws IOException {
-            buffer = new BufferedWriter(
-                new OutputStreamWriter(
-                    new FileOutputStream(file)
-                ), MB4
-            );
-        }
-
-        public void write(String... values) throws IOException {
-            buffer.write(values[0]);
-            for(int i = 1; i < values.length; i++) {
-                buffer.write("	" + (values[i] == null ? "\\N" : values[i]));
-            }
-            buffer.newLine();
-        }
-
-        public void close() throws IOException {
-            buffer.close();
-        }
-    }
-
-    public static class IndexedWriter extends Writer {
-        private long index;
-
-        public IndexedWriter(String file) throws IOException {
-            super(file);
-            index = 0;
-        }
-
-        @Override
-        public void write(String... values) throws IOException {
-            buffer.write(Long.toString(++index));
-            for(int i = 0; i < values.length; i++) {
-                buffer.write("	" + (values[i] == null ? "\\N" : values[i]));
-            }
-            buffer.newLine();
-        }
-
-        public long index() {
-            return index;
-        }
-    }
-
-    public static String toString(boolean b) {
-        return b ? "\1" : "\0";
-    }
-
-    public static boolean toBoolean(String b) {
-        return b.charAt(0) == (char) 1;
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/storage/TabWriter.java b/scripts/helper_scripts/parser/src/storage/TabWriter.java
deleted file mode 100755
index 191ff47e..00000000
--- a/scripts/helper_scripts/parser/src/storage/TabWriter.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package storage;
-
-import xml.*;
-
-import java.io.*;
-import java.util.stream.Collectors;
-
-public class TabWriter implements UniprotObserver {
-    private final BufferedWriter out;
-    private final boolean verbose;
-
-    public TabWriter(
-            OutputStream out,
-            boolean verbose
-    ) throws IOException {
-        this.out = new BufferedWriter(new OutputStreamWriter(out));
-        this.verbose = verbose;
-
-        // Write header to output file
-        this.out.write(String.join("\t", new String[]{
-                "Entry",
-                "Sequence",
-                "Protein names",
-                "Version (entry)",
-                "EC number",
-                "Gene ontology IDs",
-                "Cross-reference (InterPro)",
-                "Status",
-                "Organism ID"
-        }) + "\n");
-    }
-
-    @Override
-    public void handleEntry(UniprotEntry entry) {
-        try {
-            String line = String.join("\t", new String[]{
-                    entry.getUniprotAccessionNumber(),
-                    entry.getSequence(),
-                    entry.getName(),
-                    String.valueOf(entry.getVersion()),
-                    entry.getECReferences().stream().map(UniprotECRef::getId).collect(Collectors.joining(";")),
-                    entry.getGOReferences().stream().map(UniprotGORef::getId).collect(Collectors.joining(";")),
-                    entry.getInterProReferences().stream().map(UniprotInterProRef::getId).collect(Collectors.joining(";")),
-                    "swissprot",
-                    String.valueOf(entry.getTaxonId()),
-            });
-
-            if (verbose) {
-                System.err.println("INFO VERBOSE: Writing tabular line: " + line);
-            }
-
-            this.out.write(line + "\n");
-        } catch (IOException e) {
-            System.err.println("Could not write to output stream.");
-        }
-    }
-
-    @Override
-    public void close() {
-        try {
-            this.out.close();
-        } catch (IOException e) {
-            System.err.println("Could not correctly close output stream.");
-        }
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/storage/TableWriter.java b/scripts/helper_scripts/parser/src/storage/TableWriter.java
deleted file mode 100755
index 98de7a3f..00000000
--- a/scripts/helper_scripts/parser/src/storage/TableWriter.java
+++ /dev/null
@@ -1,249 +0,0 @@
-package storage;
-
-import taxons.TaxonList;
-import tools.TaxonsUniprots2Tables;
-import xml.*;
-
-import java.util.Set;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.io.IOException;
-import java.io.File;
-import java.sql.Timestamp;
-
-
-/**
- * Intermediate class to add PeptideData to the database
- *
- * @author Bart Mesuere
- * @author Felix Van der Jeugt
- *
- */
-public class TableWriter implements UniprotObserver {
-
-    public static final String[] ranks = new String[]{"taxon_id", "superkingdom", "kingdom", "subkingdom", "superphylum", "phylum", "subphylum","superclass", "class", "subclass", "superorder", "order", "suborder", "infraorder", "superfamily", "family", "subfamily", "tribe", "subtribe", "genus", "subgenus", "species_group", "species_subgroup", "species", "subspecies", "strain", "varietas", "forma"};
-    private static final Map<String, Integer> rankIndices = new HashMap<>();
-
-    static {
-        for(int i = 0; i < ranks.length; i++) {
-            rankIndices.put(ranks[i], i);
-        }
-    }
-
-    private TaxonList taxonList;
-    private Set<Integer> wrongTaxonIds;
-
-    // csv files
-    private CSV.IndexedWriter peptides;
-    private CSV.IndexedWriter uniprotEntries;
-    private CSV.IndexedWriter goCrossReferences;
-    private CSV.IndexedWriter ecCrossReferences;
-    private CSV.IndexedWriter interProCrossReferences;
-
-    /**
-     * Creates a new data object
-     */
-    public TableWriter(TaxonsUniprots2Tables args) {
-        wrongTaxonIds = new HashSet<Integer>();
-
-        /* Opening CSV files for writing. */
-        try {
-            taxonList = TaxonList.loadFromFile(args.taxonsFile);
-            peptides = new CSV.IndexedWriter(args.peptidesFile);
-            uniprotEntries = new CSV.IndexedWriter(args.uniprotEntriesFile);
-            ecCrossReferences = new CSV.IndexedWriter(args.ecCrossReferencesFile);
-            goCrossReferences = new CSV.IndexedWriter(args.goCrossReferencesFile);
-            interProCrossReferences = new CSV.IndexedWriter(args.interProCrossReferencesFile);
-        } catch(IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Error creating tsv files");
-            e.printStackTrace();
-            System.exit(1);
-        }
-
-    }
-
-    /**
-     * Stores a complete UniprotEntry in the database
-     *
-     * @param entry
-     *            the UniprotEntry to store
-     */
-    public void store(UniprotEntry entry) {
-        long uniprotEntryId = addUniprotEntry(entry.getUniprotAccessionNumber(), entry.getVersion(),
-                entry.getTaxonId(), entry.getType(), entry.getName(), entry.getSequence());
-        if (uniprotEntryId != -1) { // failed to add entry
-            String faSummary = Stream.of(
-                    entry.getGOReferences().stream().map(UniprotGORef::getId),
-                    entry.getECReferences().stream().filter(x -> !x.getId().isEmpty()).map(x->"EC:"+x.getId()),
-                    entry.getInterProReferences().stream().filter(x -> !x.getId().isEmpty()).map(x->"IPR:"+x.getId())
-            ).flatMap(i -> i).collect(Collectors.joining(";"));
-
-            for(String sequence : entry.digest()) {
-                addData(sequence.replace('I', 'L'), uniprotEntryId, sequence, faSummary);
-            }
-            for (UniprotGORef ref : entry.getGOReferences())
-                addGORef(ref, uniprotEntryId);
-            for (UniprotECRef ref : entry.getECReferences())
-                addECRef(ref, uniprotEntryId);
-            for (UniprotInterProRef ref : entry.getInterProReferences())
-                addInterProRef(ref, uniprotEntryId);
-        }
-    }
-
-    /**
-     *
-     * Inserts the entry info of a uniprot entry into the database and returns
-     * the generated id.
-     *
-     * @param uniprotAccessionNumber
-     *            The accession number of the entry
-     * @param version
-     *            The version of the entry
-     * @param taxonId
-     *            The taxonId of the organism of the entry
-     * @param type
-     *            The type of the entry. Can be swissprot or trembl
-     * @param sequence
-     *            The full sequence of the peptide.
-     * @return The database ID of the uniprot entry.
-     */
-    public long addUniprotEntry(String uniprotAccessionNumber, int version, int taxonId,
-            String type, String name, String sequence) {
-        if(0 <= taxonId && taxonId < taxonList.size() && taxonList.get(taxonId) != null) {
-            try {
-                uniprotEntries.write(
-                    uniprotAccessionNumber,
-                    Integer.toString(version),
-                    Integer.toString(taxonId),
-                    type,
-                    name,
-                    sequence
-                );
-                return uniprotEntries.index();
-            } catch(IOException e) {
-                System.err.println(new Timestamp(System.currentTimeMillis())
-                        + " Error writing to CSV.");
-                e.printStackTrace();
-            }
-        } else {
-            if (!wrongTaxonIds.contains(taxonId)) {
-                wrongTaxonIds.add(taxonId);
-                System.err.println(new Timestamp(System.currentTimeMillis()) + " " + taxonId
-                        + " added to the list of " + wrongTaxonIds.size() + " invalid taxonIds.");
-            }
-        }
-        return -1;
-    }
-
-    /**
-     * Adds peptide data to the database
-     *
-     * @param unifiedSequence
-     *            The sequence of the peptide with AA's I and L the
-     *            same.
-     * @param uniprotEntryId
-     *            The id of the uniprot entry from which the peptide data was
-     *            retrieved.
-     * @param originalSequence
-     *            The original sequence of the peptide.
-     * @param functionalAnnotations
-     *            A semicollon separated list of allocated functional analysis terms
-     */
-    public void addData(String unifiedSequence, long uniprotEntryId, String originalSequence, String functionalAnnotations) {
-        try {
-            peptides.write(
-                unifiedSequence,
-                originalSequence,
-                Long.toString(uniprotEntryId),
-                functionalAnnotations
-            );
-        } catch(IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Error adding this peptide to the database: " + unifiedSequence);
-            e.printStackTrace();
-        }
-    }
-
-    /**
-     * Adds a uniprot entry GO reference to the database
-     *
-     * @param ref
-     *            The uniprot GO reference to add
-     * @param uniprotEntryId
-     *            The uniprotEntry of the cross reference
-     */
-    public void addGORef(UniprotGORef ref, long uniprotEntryId) {
-        try {
-            goCrossReferences.write(Long.toString(uniprotEntryId), ref.getId());
-        } catch (IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Error adding this GO reference to the database.");
-            e.printStackTrace();
-        }
-
-    }
-
-    /**
-     * Adds a uniprot entry EC reference to the database
-     *
-     * @param ref
-     *            The uniprot EC reference to add
-     * @param uniprotEntryId
-     *            The uniprotEntry of the cross reference
-     */
-    public void addECRef(UniprotECRef ref, long uniprotEntryId) {
-        try {
-            ecCrossReferences.write(Long.toString(uniprotEntryId), ref.getId());
-        } catch (IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Error adding this EC reference to the database.");
-            e.printStackTrace();
-        }
-
-    }
-
-    /**
-     * Adds a uniprot entry InterPro reference to the database
-     *
-     * @param ref
-     *            The uniprot InterPro reference to add
-     * @param uniprotEntryId
-     *            The uniprotEntry of the cross reference
-     */
-    public void addInterProRef(UniprotInterProRef ref, long uniprotEntryId) {
-        try {
-            interProCrossReferences.write(Long.toString(uniprotEntryId), ref.getId());
-        } catch (IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Error adding this InterPro reference to the database.");
-            e.printStackTrace();
-        }
-    }
-
-    @Override
-    public void handleEntry(UniprotEntry entry) {
-        store(entry);
-    }
-
-    @Override
-    public void close() {
-        try {
-            uniprotEntries.close();
-            peptides.close();
-            goCrossReferences.close();
-            ecCrossReferences.close();
-            interProCrossReferences.close();
-        } catch(IOException e) {
-            System.err.println(new Timestamp(System.currentTimeMillis())
-                    + " Something closing the csv files.");
-            e.printStackTrace();
-        }
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/taxons/Taxon.java b/scripts/helper_scripts/parser/src/taxons/Taxon.java
deleted file mode 100755
index 43eeb376..00000000
--- a/scripts/helper_scripts/parser/src/taxons/Taxon.java
+++ /dev/null
@@ -1,55 +0,0 @@
-package taxons;
-
-import java.util.EnumMap;
-import java.util.Map;
-
-public class Taxon {
-
-    final public String name;
-    final public Rank rank;
-    final public int parent;
-
-    public boolean valid;
-
-    public Taxon(String name, Rank rank, int parent) {
-        this.name = name;
-        this.rank = rank;
-        this.parent = parent;
-        this.valid = true;
-    }
-
-    public void invalidate() {
-        this.valid = false;
-    }
-
-    public boolean valid() {
-        return this.valid;
-    }
-
-
-    public static enum Rank {
-        NO_RANK, SUPERKINGDOM, KINGDOM, SUBKINGDOM, SUPERPHYLUM, PHYLUM, SUBPHYLUM, SUPERCLASS, CLASS, SUBCLASS, SUPERORDER, ORDER, SUBORDER, INFRAORDER, SUPERFAMILY, FAMILY, SUBFAMILY, TRIBE, SUBTRIBE, GENUS, SUBGENUS, SPECIES_GROUP, SPECIES_SUBGROUP, SPECIES, SUBSPECIES, STRAIN, VARIETAS, FORMA;
-
-        public static final Rank[] values = Rank.values();
-
-        private static final Map<Rank, Integer> indices = new EnumMap<Rank, Integer>(Rank.class);
-        static {
-            for(int i = 0; i < values.length; i++) {
-                indices.put(values[i], i);
-            }
-        }
-
-        public int index() {
-            return indices.get(this);
-        }
-
-        public String toString() {
-            return this.name().toLowerCase().replace('_', ' ');
-        }
-
-        public static Rank fromString(String s) {
-            return valueOf(s.toUpperCase().replace(' ', '_'));
-        }
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/taxons/TaxonList.java b/scripts/helper_scripts/parser/src/taxons/TaxonList.java
deleted file mode 100755
index f0173f2d..00000000
--- a/scripts/helper_scripts/parser/src/taxons/TaxonList.java
+++ /dev/null
@@ -1,172 +0,0 @@
-package taxons;
-
-import storage.CSV;
-
-import java.util.ArrayList;
-import java.util.regex.Pattern;
-import java.io.FileReader;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.FileNotFoundException;
-
-public class TaxonList extends ArrayList<Taxon> {
-
-    private static final Pattern PATTERN = Pattern.compile("\\|");
-
-    public TaxonList() {
-        super();
-    }
-
-    public static TaxonList loadFromFile(String filename) throws IOException {
-        TaxonList tl = new TaxonList();
-        CSV.Reader reader = new CSV.Reader(filename);
-        String[] row = null;
-        while((row = reader.read()) != null) {
-            int id = Integer.parseInt(row[0]);
-            Taxon t = new Taxon(
-                row[1],
-                Taxon.Rank.fromString(row[2]),
-                Integer.parseInt(row[3])
-            );
-            if(! CSV.toBoolean(row[4])) t.invalidate();
-            while(tl.size() <= id) tl.add(null);
-            tl.set(id, t);
-        }
-        return tl;
-    }
-
-    public static TaxonList parseDumps(String namesFile, String nodesFile)
-    throws FileNotFoundException, IOException {
-        TaxonList tl = new TaxonList();
-        BufferedReader names = new BufferedReader(new FileReader(namesFile));
-        BufferedReader nodes = new BufferedReader(new FileReader(nodesFile));
-
-        String nodeline = null;
-        while((nodeline = nodes.readLine()) != null) {
-            String[] noderow = PATTERN.split(nodeline);
-            int taxon_id = Integer.parseInt(noderow[0].trim());
-            int parent_id = Integer.parseInt(noderow[1].trim());
-            Taxon.Rank rank = Taxon.Rank.fromString(noderow[2].trim());
-
-            String nameline = null;
-            String name = null, clas = null;
-            int taxon_id2 = -1;
-            while(!"scientific name".equals(clas) && (nameline = names.readLine()) != null) {
-                String[] namerow = PATTERN.split(nameline);
-                taxon_id2 = Integer.parseInt(namerow[0].trim());
-                name = namerow[1].trim();
-                clas = namerow[3].trim();
-            }
-
-            if("scientific name".equals(clas) && taxon_id == taxon_id2) {
-                while(tl.size() <= taxon_id) tl.add(null);
-                tl.set(taxon_id, new Taxon(name, rank, parent_id));
-            } else {
-                throw new RuntimeException("Taxon " + taxon_id +
-                        " did not have a scientific name.");
-            }
-        }
-
-        names.close();
-        nodes.close();
-
-        return tl;
-
-    }
-
-    public void invalidate() {
-        for(int i = 0; i < size(); i++) validate(i);
-    }
-
-    private boolean validate(int taxon_id) {
-        Taxon t = get(taxon_id);
-
-        if(t == null) return false;
-
-        if(! t.valid()
-        || (t.rank == Taxon.Rank.SPECIES
-          && (
-            (t.name.matches(".*\\d.*") && !t.name.contains("virus"))
-            || t.name.endsWith(" sp.")
-            || t.name.endsWith(" genomosp.")
-            || t.name.contains(" bacterium")
-          )
-        )
-        || t.name.contains("enrichment culture")
-        || t.name.contains("mixed culture")
-        || t.name.contains("uncultured")
-        || t.name.contains("unidentified")
-        || t.name.contains("unspecified")
-        || t.name.contains("undetermined")
-        || t.name.contains("sample")
-        || t.name.endsWith("metagenome")
-        || t.name.endsWith("library")
-        || taxon_id == 28384
-        || taxon_id == 48479
-        || taxon_id == 1869227) {
-            t.invalidate();
-            return false;
-        }
-
-        if(taxon_id == 1) return true;
-
-        if(! validate(t.parent)) t.invalidate();
-        return t.valid();
-    }
-
-    public void writeToFile(String filename) throws IOException {
-        CSV.Writer writer = new CSV.Writer(filename);
-        for(int i = 0; i < size(); i++) {
-            Taxon t = get(i);
-            if(t != null) writer.write(Integer.toString(i), t.name,
-                    t.rank.toString(), Integer.toString(t.parent),
-                    CSV.toString(t.valid()));
-        }
-        writer.close();
-    }
-
-    public void writeLineagesToFile(String filename) throws IOException {
-        CSV.Writer writer = new CSV.Writer(filename);
-        int nranks = Taxon.Rank.values.length;
-
-        for(int i = 0; i < size(); i++) {
-            Taxon t = get(i);
-            if(t == null) continue;
-
-            // +1 want - no_rank + lineage_id + taxon_id
-            String[] lineage = new String[nranks];
-            lineage[0] = Integer.toString(i);
-
-            int tid = rankedAncestor(i);
-            t = get(tid);
-            boolean valid = t.valid();
-            for(int j = nranks - 1; j >= 1; j--) {
-                if(j > t.rank.index()) {
-                    lineage[j] = valid ? null : "-1";
-                } else {
-                    valid = t.valid();
-                    lineage[j] = Integer.toString((valid ? 1 : -1) * tid);
-                    tid = rankedAncestor(t.parent);
-                    t = get(tid);
-                }
-            }
-
-            writer.write(lineage);
-        }
-
-        writer.close();
-    }
-
-    private int rankedAncestor(int tid) {
-        Taxon t = get(tid);
-        int pid = -1;
-        while(t != null && tid != pid && t.rank == Taxon.Rank.NO_RANK) {
-            pid = tid;
-            tid = t.parent;
-            t = get(tid);
-        }
-        if(t != null) return tid;
-        return 1; // only used in case a taxon is no descendant of root
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/tools/LineagesSequencesTaxons2LCAs.java b/scripts/helper_scripts/parser/src/tools/LineagesSequencesTaxons2LCAs.java
deleted file mode 100755
index 67cf3630..00000000
--- a/scripts/helper_scripts/parser/src/tools/LineagesSequencesTaxons2LCAs.java
+++ /dev/null
@@ -1,141 +0,0 @@
-package tools;
-
-import java.io.*;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.regex.Pattern;
-
-public class LineagesSequencesTaxons2LCAs {
-
-    public static final int GENUS = 18;
-    public static final int SPECIES = 22;
-    public static final int RANKS = 27;
-    private static final Pattern SEPARATOR = Pattern.compile("\t");
-    private static final String NULL = "\\N";
-    private int[][] taxonomy;
-    private final Writer writer;
-
-    public LineagesSequencesTaxons2LCAs(String taxonomyFile) throws IOException {
-        writer = new BufferedWriter(new OutputStreamWriter(System.out, "utf-8"));
-        buildTaxonomy(taxonomyFile);
-    }
-
-    private void buildTaxonomy(String file) throws FileNotFoundException, IOException {
-        HashMap<Integer, int[]> taxonomyMap = new HashMap<>();
-        InputStream is = new FileInputStream(new File(file));
-        BufferedReader br = new BufferedReader(new InputStreamReader(is));
-
-        br.lines()
-                .forEach(line -> {
-                    String[] elements = SEPARATOR.split(line, 28);
-
-                    int key = Integer.parseInt(elements[0]);
-                    int[] lineage = Arrays.stream(elements)
-                            .skip(1)// skip taxonId
-                            .mapToInt(s -> s.toUpperCase().equals("\\N") ? 0 : Integer.parseInt(s))
-                            .toArray();
-
-                    taxonomyMap.put(key, lineage);
-                });
-
-        int max = taxonomyMap.keySet().stream().max(Integer::compare).get();
-        taxonomy = new int[max + 1][];
-        taxonomyMap.keySet().stream().forEach(key -> taxonomy[key] = taxonomyMap.get(key));
-    }
-
-    public void calculateLCAs() throws IOException {
-        BufferedReader br = new BufferedReader(new InputStreamReader(System.in), 67108864);
-
-        int count = 0;
-        String currentSequence = null;
-        Collection<Integer> taxa = new ArrayList<>();
-        String line;
-        while ((line = br.readLine()) != null) {
-            count++;
-            if (count % 10000000 == 0) {
-                System.err.println(new Timestamp(System.currentTimeMillis()) + ": " + count);
-            }
-
-            // outperforms split by at least 20%
-            int t = line.indexOf('\t');
-            String sequence = line.substring(0, t);
-            int taxonId = Integer.parseInt(line.substring(t + 1));
-
-            if (currentSequence == null || !currentSequence.equals(sequence)) {
-                if (currentSequence != null) {
-                    handleLCA(currentSequence, calculateLCA(taxa));
-                }
-
-                currentSequence = sequence;
-                taxa.clear();
-            }
-
-            taxa.add(taxonId);
-        }
-        handleLCA(currentSequence, calculateLCA(taxa));
-    }
-
-    private int calculateLCA(Collection<Integer> taxa) {
-        int lca = 1;
-        int[][] lineages = taxa.stream()
-                .map(t -> taxonomy[t])
-                .filter(l -> l != null)
-                .toArray(int[][]::new);
-        for (int rank = 0; rank < RANKS; rank++) {
-            final int finalRank = rank;
-            final int[] val = {-1};
-            boolean allMatch = Arrays.stream(lineages)
-                    .mapToInt(l -> l[finalRank])
-                    .filter(i -> finalRank == GENUS || finalRank == SPECIES ? i > 0 : i >= 0)
-                    .peek(i -> val[0] = val[0] == -1 ? i : val[0])
-                    .allMatch(i -> i == val[0]);
-
-            if (val[0] != -1) {
-                if (!allMatch) {
-                    break;
-                }
-                if (val[0] != 0) {
-                    lca = val[0];
-                }
-            }
-        }
-        return lca;
-    }
-
-    private void handleLCA(String sequence, int lca) {
-        try {
-            writer.write(sequence + "\t" + lca + '\n');
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-
-    public void close() throws IOException {
-        writer.close();
-    }
-
-    /**
-     * first argument should be the lineages in tsv format without a header row. Create by running:
-     * $ echo "select * from lineages;" | mysql -u unipept -p unipept | sed 1d > lineages.tsv
-     * <p/>
-     * standard input should be the peptides in tsv format with a header row. Create by running:
-     * $ echo "select sequence_id, taxon_id from peptides left join uniprot_entries on peptides.uniprot_entry_id = uniprot_entries.id;" | \n
-     * mysql -u unipept -p unipept -q | sort -S 50% --parallel=12 -k1n > sequences.tsv
-     *
-     * @param args
-     */
-    public static void main(String... args) {
-        try {
-            System.err.println(new Timestamp(System.currentTimeMillis()) + ": reading taxonomy");
-            LineagesSequencesTaxons2LCAs l = new LineagesSequencesTaxons2LCAs(args[0]);
-            System.err.println(new Timestamp(System.currentTimeMillis()) + ": reading sequences");
-            l.calculateLCAs();
-            l.close();
-        } catch (IOException e) {
-            e.printStackTrace();
-        }
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java b/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java
deleted file mode 100755
index 06a72e56..00000000
--- a/scripts/helper_scripts/parser/src/tools/NamesNodes2TaxonsLineages.java
+++ /dev/null
@@ -1,36 +0,0 @@
-package tools;
-
-import java.io.IOException;
-import java.io.FileNotFoundException;
-
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.JCommander;
-import taxons.TaxonList;
-
-public class NamesNodes2TaxonsLineages {
-
-    @Parameter(names="--names", description="Taxon names input file") public String namesFile;
-    @Parameter(names="--nodes", description="Taxon nodes input file") public String nodesFile;
-    @Parameter(names="--taxons", description="Taxon TSV output file") public String taxonsFile;
-    @Parameter(names="--lineages", description="Lineages TSV output file") public String lineagesFile;
-
-    /**
-     * Parse a list of taxons and their lineages from the NCBI dumps.
-     *
-     * This program will parse the first two argument files, and create the next
-     * two. The first two arguments are the nodes.dmp and names.dmp files
-     * downloaded from the NCBI. TSV-dumps of the parsed taxons and lineages
-     * will be written to the third and fourth parameter.
-     */
-    public static void main(String[] args) throws IOException {
-        NamesNodes2TaxonsLineages main = new NamesNodes2TaxonsLineages();
-        new JCommander(main, args);
-
-        TaxonList tl = TaxonList.parseDumps(main.namesFile, main.nodesFile);
-        tl.invalidate();
-        tl.writeToFile(main.taxonsFile);
-        tl.writeLineagesToFile(main.lineagesFile);
-    }
-
-}
-
diff --git a/scripts/helper_scripts/parser/src/tools/TaxonsUniprots2Tables.java b/scripts/helper_scripts/parser/src/tools/TaxonsUniprots2Tables.java
deleted file mode 100644
index 14763ee7..00000000
--- a/scripts/helper_scripts/parser/src/tools/TaxonsUniprots2Tables.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package tools;
-
-import java.io.IOException;
-
-import com.beust.jcommander.Parameter;
-import com.beust.jcommander.JCommander;
-
-import storage.TableWriter;
-import tsv.UniprotTabParser;
-
-public class TaxonsUniprots2Tables {
-
-    @Parameter(names="--peptide-min",     description="Minimum peptide length")               public int peptideMin;
-    @Parameter(names="--peptide-max",     description="Maximum peptide length")               public int peptideMax;
-    @Parameter(names="--taxons",          description="Taxons TSV input file")                public String taxonsFile;
-    @Parameter(names="--peptides",        description="Peptides TSV output file")             public String peptidesFile;
-    @Parameter(names="--uniprot-entries", description="Uniprot entries TSV output file")      public String uniprotEntriesFile;
-    @Parameter(names="--ec",              description="EC references TSV output file")        public String ecCrossReferencesFile;
-    @Parameter(names="--go",              description="GO references TSV output file")        public String goCrossReferencesFile;
-    @Parameter(names="--interpro",        description="InterPro references TSV output file")  public String interProCrossReferencesFile;
-    @Parameter(names="--verbose",         description="Enable verbose mode")                  public boolean verboseMode;
-
-    /**
-     * Parse the UniProt TSV-file into TSV tables.
-     *
-     * The first parameter is a taxon file, as written by NamesNodes2Taxons. The next 5 parameters are the output files,
-     * all in TSV format. In order, they are: the peptides, the uniprot entries, the EC cross references, the GO cross
-     * references and the InterPro cross references.
-     *
-     * This program reads input from stdin and writes output to the files indicated by the parameters given above.
-     */
-    public static void main(String[] args) throws IOException {
-        TaxonsUniprots2Tables main = new TaxonsUniprots2Tables();
-        new JCommander(main, args);
-
-        if (main.verboseMode) {
-            System.err.println("INFO: TaxonsUniprots2Tables - Verbose mode enabled.");
-        }
-
-        TableWriter writer = new TableWriter(main);
-
-        UniprotTabParser parser = new UniprotTabParser();
-        parser.parse(main.peptideMin, main.peptideMax, System.in, writer, main.verboseMode);
-
-        writer.close();
-    }
-
-}
-
diff --git a/scripts/helper_scripts/parser/src/tools/XmlToTabConverter.java b/scripts/helper_scripts/parser/src/tools/XmlToTabConverter.java
deleted file mode 100755
index ab045f19..00000000
--- a/scripts/helper_scripts/parser/src/tools/XmlToTabConverter.java
+++ /dev/null
@@ -1,35 +0,0 @@
-package tools;
-
-import org.xml.sax.SAXException;
-import storage.TabWriter;
-import xml.UniprotHandler;
-
-import javax.xml.parsers.ParserConfigurationException;
-import javax.xml.parsers.SAXParser;
-import javax.xml.parsers.SAXParserFactory;
-import java.io.*;
-
-/**
- * This tool accepts 3 different arguments:
- * peptide_min_length, peptide_max_length, database_type_name
- *
- * The input is read from stdin and the output of this script is written to stdout.
- *
- * This tool's job is to produce a TSV-file with the same contents as the XML-file that's fed into this script.
- */
-public class XmlToTabConverter {
-    public static void main(String[] args) throws IOException, SAXException, ParserConfigurationException {
-        SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
-
-        InputStream uniprotStream = System.in;
-        UniprotHandler handler = new UniprotHandler(Integer.parseInt(args[0]), Integer.parseInt(args[1]), args[2]);
-
-        TabWriter writer = new TabWriter(System.out, Boolean.parseBoolean(args[3]));
-        handler.addObserver(writer);
-
-        parser.parse(uniprotStream, handler);
-
-        uniprotStream.close();
-        writer.close();
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/tsv/UniprotTabParser.java b/scripts/helper_scripts/parser/src/tsv/UniprotTabParser.java
deleted file mode 100755
index 0d057cb8..00000000
--- a/scripts/helper_scripts/parser/src/tsv/UniprotTabParser.java
+++ /dev/null
@@ -1,77 +0,0 @@
-package tsv;
-
-import xml.*;
-
-import java.io.*;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.stream.Stream;
-
-public class UniprotTabParser {
-    public void parse(
-        int peptideMinLength,
-        int peptideMaxLength,
-        InputStream input,
-        UniprotObserver observer,
-        boolean verbose
-    ) throws IOException {
-        BufferedReader reader = new BufferedReader(new InputStreamReader(input));
-
-        String line = reader.readLine().trim();
-        String[] header = Stream.of(line.split("\t")).map(String::trim).toArray(String[]::new);
-
-        Map<String, Integer> headerMap = new HashMap<String, Integer>();
-        for (int i = 0; i < header.length; i++) {
-            headerMap.put(header[i], i);
-        }
-
-        line = reader.readLine();
-
-        while (line != null) {
-            if (verbose) {
-                System.err.println("INFO VERBOSE: TSV line parsed: " + line);
-            }
-
-            String[] fields = line.trim().split("\t");
-
-            try {
-                // We need to emit one new UniprotEntry per line in the input
-                UniprotEntry entry = new UniprotEntry(fields[headerMap.get("Status")].trim(), peptideMinLength, peptideMaxLength);
-
-                // Now convert all fields into the correct Uniprot entry properties
-                entry.setUniprotAccessionNumber(fields[headerMap.get("Entry")]);
-                entry.setSequence(fields[headerMap.get("Sequence")].trim());
-
-                entry.setRecommendedName(fields[headerMap.get("Protein names")].trim());
-                // Todo, does not always need to be set?
-                // entry.setSubmittedName("name");
-
-                entry.setVersion(Integer.parseInt(fields[headerMap.get("Version (entry)")].trim()));
-
-                for (String ecNumber : fields[headerMap.get("EC number")].split(";")) {
-                    entry.addECRef(new UniprotECRef(ecNumber.trim()));
-                }
-
-                for (String goTerm : fields[headerMap.get("Gene ontology IDs")].split(";")) {
-                    entry.addGORef(new UniprotGORef(goTerm.trim()));
-                }
-
-                for (String interpro : fields[headerMap.get("Cross-reference (InterPro)")].split(";")) {
-                    entry.addInterProRef(new UniprotInterProRef(interpro.trim()));
-                }
-
-                entry.setTaxonId(Integer.parseInt(fields[headerMap.get("Organism ID")]));
-
-                // Emit entry that's finished and handle it...
-                observer.handleEntry(entry);
-            } catch (Exception e) {
-                System.err.println("Invalid entry ignored: " + line);
-                System.err.println("Invalid entry error details: " + e.getMessage());
-            }
-
-            line = reader.readLine();
-        }
-
-        reader.close();
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotDbRef.java b/scripts/helper_scripts/parser/src/xml/UniprotDbRef.java
deleted file mode 100755
index cdbd58cd..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotDbRef.java
+++ /dev/null
@@ -1,39 +0,0 @@
-package xml;
-
-public class UniprotDbRef {
-
-    private String type;
-    private String sequenceId;
-    private String proteinId;
-
-    public UniprotDbRef(String type, String sequenceId, String proteinId) {
-        this.type = type;
-        this.sequenceId = sequenceId;
-        this.proteinId = proteinId;
-    }
-
-    public UniprotDbRef(String type) {
-        this.type = type;
-    }
-
-    public String getType() {
-        return type;
-    }
-
-    public String getSequenceId() {
-        return sequenceId;
-    }
-
-    public void setSequenceId(String sequenceId) {
-        this.sequenceId = sequenceId;
-    }
-
-    public String getProteinId() {
-        return proteinId;
-    }
-
-    public void setProteinId(String proteinId) {
-        this.proteinId = proteinId;
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotECRef.java b/scripts/helper_scripts/parser/src/xml/UniprotECRef.java
deleted file mode 100755
index 4adba41f..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotECRef.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package xml;
-
-public class UniprotECRef {
-
-    private String id;
-
-    public UniprotECRef(String id) {
-        this.id = id;
-    }
-
-    public String getId() {
-        return id;
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotEntry.java b/scripts/helper_scripts/parser/src/xml/UniprotEntry.java
deleted file mode 100755
index 870442da..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotEntry.java
+++ /dev/null
@@ -1,162 +0,0 @@
-package xml;
-
-import java.util.Arrays;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Stream;
-
-/**
- * @author Bart Mesuere
- *
- */
-public class UniprotEntry {
-
-    // peptide settings
-    private final int peptideMin;
-    private final int peptideMax;
-
-    private String uniprotAccessionNumber;
-    private int version;
-    private int taxonId;
-    private String type;
-    private String recommendedName;
-    private String submittedName;
-    private String sequence;
-    private List<UniprotDbRef> dbReferences;
-    private List<UniprotGORef> goReferences;
-    private List<UniprotECRef> ecReferences;
-    private List<UniprotInterProRef> interProReferences;
-    private List<String> sequences;
-
-    public UniprotEntry(String type, int peptideMin, int peptideMax) {
-        this.type = type;
-        this.peptideMin = peptideMin;
-        this.peptideMax = peptideMax;
-        dbReferences = new ArrayList<UniprotDbRef>();
-        goReferences = new ArrayList<UniprotGORef>();
-        ecReferences = new ArrayList<UniprotECRef>();
-        interProReferences = new ArrayList<UniprotInterProRef>();
-        sequences = new ArrayList<String>();
-    }
-
-    public void reset(String type) {
-        uniprotAccessionNumber = null;
-        version = 0;
-        taxonId = 0;
-        this.type = type;
-        recommendedName = null;
-        submittedName = null;
-        sequence = null;
-        dbReferences.clear();
-        goReferences.clear();
-        ecReferences.clear();
-        interProReferences.clear();
-        sequences.clear();
-    }
-
-    public String getUniprotAccessionNumber() {
-        return uniprotAccessionNumber;
-    }
-
-    public void setUniprotAccessionNumber(String uniprotAccessionNumber) {
-        if(this.uniprotAccessionNumber == null) {
-            this.uniprotAccessionNumber = uniprotAccessionNumber;
-        }
-    }
-
-    public int getVersion() {
-        return version;
-    }
-
-    public void setVersion(int version) {
-        this.version = version;
-    }
-
-    public int getTaxonId() {
-        return taxonId;
-    }
-
-    public void setTaxonId(int taxonId) {
-        this.taxonId = taxonId;
-    }
-
-    public String getType() {
-        return type;
-    }
-
-    public String getName() {
-        if(recommendedName != null) return recommendedName;
-        return submittedName;
-    }
-
-    public void setRecommendedName(String name) {
-        recommendedName = name;
-    }
-
-    public void setSubmittedName(String name) {
-        submittedName = name;
-    }
-
-    public String getSequence() {
-        return sequence;
-    }
-
-    public void setSequence(String sequence) {
-        this.sequence = sequence.replace(" ", "");
-    }
-
-    public void addDbRef(UniprotDbRef ref) {
-        dbReferences.add(ref);
-    }
-
-    public void addGORef(UniprotGORef ref) {
-        goReferences.add(ref);
-    }
-
-    public void addECRef(UniprotECRef ref) {
-        ecReferences.add(ref);
-    }
-
-    public void addInterProRef(UniprotInterProRef ref) { interProReferences.add(ref); }
-
-    public List<String> digest() {
-        sequences.clear();
-        int start = 0;
-        int length = sequence.length();
-        for (int i = 0; i < length; i++) {
-            char x = sequence.charAt(i);
-            if ((x == 'K' || x == 'R') && (i + 1 < length && sequence.charAt(i + 1) != 'P')) {
-                if (i + 1 - start >= peptideMin && i + 1 - start <= peptideMax) {
-                    sequences.add(sequence.substring(start, i + 1));
-                }
-                start = i + 1;
-            }
-        }
-        if (length - start >= peptideMin && length - start <= peptideMax) {
-            sequences.add(sequence.substring(start, length));
-        }
-        return sequences;
-    }
-
-    public List<UniprotDbRef> getDbReferences() {
-        return dbReferences;
-    }
-
-    public List<UniprotGORef> getGOReferences() {
-        return goReferences;
-    }
-
-    public List<UniprotECRef> getECReferences() {
-        return ecReferences;
-    }
-
-    public List<UniprotInterProRef> getInterProReferences(){ return interProReferences; }
-
-
-    @Override
-    public String toString() {
-        return uniprotAccessionNumber + ", " + version + ", " + taxonId + ", " + type + ", "
-                + sequence;
-    }
-
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotGORef.java b/scripts/helper_scripts/parser/src/xml/UniprotGORef.java
deleted file mode 100755
index d1c909f1..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotGORef.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package xml;
-
-public class UniprotGORef {
-
-    private String id;
-
-    public UniprotGORef(String id) {
-        this.id = id;
-    }
-
-    public String getId() {
-        return id;
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotHandler.java b/scripts/helper_scripts/parser/src/xml/UniprotHandler.java
deleted file mode 100755
index 8164d162..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotHandler.java
+++ /dev/null
@@ -1,249 +0,0 @@
-package xml;
-
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import org.xml.sax.Attributes;
-import org.xml.sax.SAXException;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class UniprotHandler extends DefaultHandler {
-
-    private final String uniprotType;
-
-    private UniprotEntry currentItem;
-    private UniprotDbRef dbRef;
-    private UniprotGORef goRef;
-    private UniprotECRef ecRef;
-    private UniprotInterProRef interProRef;
-    private StringBuilder charData;
-    private int i;
-    private boolean inComment = false;
-    private boolean inOrganism = false;
-    private boolean inEvidence = false;
-    private boolean inRecommendedName = false;
-    private boolean inSubmittedName = false;
-    private List<UniprotObserver> observers;
-
-    private Map<String, EndTagWorker> endTagWorkers;
-    private Map<String, StartTagWorker> startTagWorkers;
-
-    public UniprotHandler(int peptideMinLength, int peptideMaxLength, String uniprotType) {
-        super();
-        this.uniprotType = uniprotType;
-        currentItem = new UniprotEntry(uniprotType, peptideMinLength, peptideMaxLength);
-        charData = new StringBuilder();
-        observers = new ArrayList<UniprotObserver>();
-
-        // set up end tag workers
-        endTagWorkers = new HashMap<String, EndTagWorker>();
-        endTagWorkers.put("entry", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                emitEntry(currentItem);
-            }
-        });
-        endTagWorkers.put("accession", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                currentItem.setUniprotAccessionNumber(data);
-            }
-        });
-        endTagWorkers.put("organism", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                inOrganism = false;
-            }
-        });
-        endTagWorkers.put("evidence", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                inEvidence = false;
-            }
-        });
-        endTagWorkers.put("recommendedName", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                inRecommendedName = false;
-            }
-        });
-        endTagWorkers.put("submittedName", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                inSubmittedName = false;
-            }
-        });
-        endTagWorkers.put("sequence", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                currentItem.setSequence(data);
-            }
-        });
-        endTagWorkers.put("dbReference", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                if (inComment) {
-                    return;
-                }
-
-                if (!inOrganism) {
-                    if (dbRef != null) {
-                        currentItem.addDbRef(dbRef);
-                        dbRef = null;
-                    } else if (goRef != null) {
-                        currentItem.addGORef(goRef);
-                        goRef = null;
-                    } else if (ecRef != null) {
-                        currentItem.addECRef(ecRef);
-                        ecRef = null;
-                    } else if (interProRef != null) {
-                        currentItem.addInterProRef(interProRef);
-                        interProRef = null;
-                    }
-                }
-            }
-        });
-        endTagWorkers.put("fullName", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                if (inRecommendedName) {
-                    currentItem.setRecommendedName(data);
-                } else if (inSubmittedName) {
-                    currentItem.setSubmittedName(data);
-                }
-            }
-        });
-        endTagWorkers.put("comment", new EndTagWorker() {
-            @Override
-            public void handleTag(String data) {
-                inComment = false;
-            }
-        });
-
-        // set up start tag workers
-        startTagWorkers = new HashMap<String, StartTagWorker>();
-        startTagWorkers.put("entry", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                newCurrentItem();
-                currentItem.setVersion(Integer.valueOf(atts.getValue("version")));
-            }
-        });
-        startTagWorkers.put("organism", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                inOrganism = true;
-            }
-        });
-        startTagWorkers.put("evidence", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                inEvidence = true;
-            }
-        });
-        startTagWorkers.put("recommendedName", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                inRecommendedName = true;
-            }
-        });
-        startTagWorkers.put("submittedName", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                inSubmittedName = true;
-            }
-        });
-        startTagWorkers.put("dbReference", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                // Skip references if they are embedded in comments (otherwise, these could cause duplicate identifiers)
-                if (inComment) {
-                    return;
-                }
-
-                if (inOrganism) {
-                    if (atts.getValue("type").equals("NCBI Taxonomy"))
-                        currentItem.setTaxonId(Integer.valueOf(atts.getValue("id")));
-                } else if (!inEvidence) {
-                    if (atts.getValue("type").equals("EMBL")) {
-                        dbRef = new UniprotDbRef("EMBL");
-                        dbRef.setSequenceId(atts.getValue("id"));
-                    } else if (atts.getValue("type").equals("RefSeq")) {
-                        dbRef = new UniprotDbRef("RefSeq");
-                        dbRef.setProteinId(atts.getValue("id"));
-                    } else if (atts.getValue("type").equals("GO")) {
-                        goRef = new UniprotGORef(atts.getValue("id"));
-                    } else if (atts.getValue("type").equals("EC")) {
-                        ecRef = new UniprotECRef(atts.getValue("id"));
-                    } else if (atts.getValue("type").equals("InterPro")) {
-                        interProRef = new UniprotInterProRef(atts.getValue("id"));
-                    }
-                }
-            }
-        });
-        startTagWorkers.put("property", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                if (dbRef != null) {
-                    if (atts.getValue("type").equals("protein sequence ID"))
-                        dbRef.setProteinId(atts.getValue("value"));
-                    else if (atts.getValue("type").equals("nucleotide sequence ID"))
-                        dbRef.setSequenceId(atts.getValue("value"));
-                }
-            }
-        });
-        startTagWorkers.put("comment", new StartTagWorker() {
-            @Override
-            public void handleTag(Attributes atts) {
-                inComment = true;
-            }
-        });
-    }
-
-    @Override
-    public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
-        StartTagWorker worker = startTagWorkers.get(qName);
-        if (worker != null) {
-            worker.handleTag(atts);
-        }
-    }
-
-    @Override
-    public void endElement(String uri, String localName, String qName) throws SAXException {
-        EndTagWorker worker = endTagWorkers.get(qName);
-        if (worker != null) {
-            worker.handleTag(charData.toString().trim());
-        }
-        charData.delete(0, charData.length());
-    }
-
-    @Override
-    public void characters(char[] ch, int start, int length) throws SAXException {
-        charData.append(ch, start, length);
-    }
-
-    private void newCurrentItem() {
-        currentItem.reset(uniprotType);
-    }
-
-    private interface StartTagWorker {
-        void handleTag(Attributes att);
-    }
-
-    private interface EndTagWorker {
-        void handleTag(String data);
-    }
-
-    public void addObserver(UniprotObserver o) {
-        observers.add(o);
-    }
-
-    private void emitEntry(UniprotEntry entry) {
-        for (UniprotObserver o : observers) {
-            o.handleEntry(entry);
-        }
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotInterProRef.java b/scripts/helper_scripts/parser/src/xml/UniprotInterProRef.java
deleted file mode 100755
index 87211e91..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotInterProRef.java
+++ /dev/null
@@ -1,14 +0,0 @@
-package xml;
-
-public class UniprotInterProRef {
-
-    private String id;
-
-    public UniprotInterProRef(String id) {
-        this.id = id;
-    }
-
-    public String getId() {
-        return id;
-    }
-}
diff --git a/scripts/helper_scripts/parser/src/xml/UniprotObserver.java b/scripts/helper_scripts/parser/src/xml/UniprotObserver.java
deleted file mode 100755
index 45bf4bc2..00000000
--- a/scripts/helper_scripts/parser/src/xml/UniprotObserver.java
+++ /dev/null
@@ -1,8 +0,0 @@
-package xml;
-
-import xml.UniprotEntry;
-
-public interface UniprotObserver {
-    public void handleEntry(UniprotEntry entry);
-    public void close();
-}
diff --git a/scripts/helper_scripts/unipept-database-rs/src/bin/dat-parser.rs b/scripts/helper_scripts/unipept-database-rs/src/bin/dat-parser.rs
index 007716d3..7e9e3c6b 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/bin/dat-parser.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/bin/dat-parser.rs
@@ -2,7 +2,6 @@ use anyhow::{Context, Result};
 use clap::Parser;
 use unipept_database::dat_parser::uniprot_dat_parser;
 use unipept_database::dat_parser::utils::write_header;
-use unipept_database::uniprot::UniprotType;
 
 use unipept_database::utils::files::open_sin;
 
@@ -24,8 +23,8 @@ fn main() -> Result<()> {
 
 #[derive(Parser, Debug)]
 struct Cli {
-    #[clap(value_enum, short = 't', long, default_value_t = UniprotType::Swissprot)]
-    db_type: UniprotType,
+    #[clap(short = 't', long, default_value = "swissprot")]
+    db_type: String,
     #[clap(long, default_value_t = 0)]
     threads: usize,
 }
diff --git a/scripts/helper_scripts/unipept-database-rs/src/bin/taxa-by-chunk.rs b/scripts/helper_scripts/unipept-database-rs/src/bin/taxa-by-chunk.rs
index 64a20225..a19958ff 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/bin/taxa-by-chunk.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/bin/taxa-by-chunk.rs
@@ -31,7 +31,7 @@ fn main() -> Result<()> {
         all_taxa.push(taxa_id);
     }
 
-    let chunk_file_regex = Regex::new(r"unipept\..*\.gz").context("Error creating regex")?;
+    let chunk_file_regex = Regex::new(r"unipept\..*\.lz4").context("Error creating regex")?;
 
     for entry in read_dir(&args.chunk_dir).context("Error reading chunk directory")? {
         let entry = entry.context("Error reading entry from chunk directory")?;
@@ -52,7 +52,7 @@ fn main() -> Result<()> {
         }
 
         // Parse the taxa range out of the filename
-        let replaced_name = base_name.replace("unipept.", "").replace(".chunk.gz", "");
+        let replaced_name = base_name.replace("unipept.", "").replace(".chunk.lz4", "");
         let range = replaced_name.split_once('-');
         let range = range.with_context(|| format!("Unable to split {replaced_name} on '-'"))?;
         let start: u64 = range
diff --git a/scripts/helper_scripts/unipept-database-rs/src/bin/xml-parser.rs b/scripts/helper_scripts/unipept-database-rs/src/bin/xml-parser.rs
index 9e102cb6..b10ae097 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/bin/xml-parser.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/bin/xml-parser.rs
@@ -4,7 +4,6 @@ use std::num::NonZeroUsize;
 use anyhow::{Context, Result};
 use clap::Parser;
 use smartstring::{LazyCompact, SmartString};
-use unipept_database::uniprot::UniprotType;
 use uniprot::uniprot::{SequentialParser, ThreadedParser};
 
 use unipept_database::utils::files::open_sin;
@@ -50,8 +49,8 @@ type SmartStr = SmartString<LazyCompact>;
 // Parse a Uniprot XML file and convert it into a TSV-file
 #[derive(Parser, Debug)]
 struct Cli {
-    #[clap(value_enum, short = 't', long, default_value_t = UniprotType::Swissprot)]
-    uniprot_type: UniprotType,
+    #[clap(short = 't', long, default_value = "swissprot")]
+    uniprot_type: String,
     #[clap(long, default_value_t = 0)]
     threads: u32,
     #[clap(short, long, default_value_t = false)]
@@ -123,7 +122,7 @@ fn parse_name(entry: &uniprot::uniprot::Entry) -> SmartStr {
 }
 
 /// Write a single UniProt entry to stdout
-fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &UniprotType, verbose: bool) {
+fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &str, verbose: bool) {
     let accession_number: SmartStr = entry.accessions[0].clone();
     let sequence: SmartStr = entry.sequence.value.clone();
 
@@ -165,7 +164,7 @@ fn write_entry(entry: &uniprot::uniprot::Entry, db_type: &UniprotType, verbose:
         SmartStr::from(ec_references.join(";")),
         SmartStr::from(go_references.join(";")),
         SmartStr::from(ip_references.join(";")),
-        SmartStr::from(db_type.to_str()),
+        SmartStr::from(db_type),
         taxon_id,
     ];
 
diff --git a/scripts/helper_scripts/unipept-database-rs/src/dat_parser/entry.rs b/scripts/helper_scripts/unipept-database-rs/src/dat_parser/entry.rs
index 4d22fd78..0c2745a6 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/dat_parser/entry.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/dat_parser/entry.rs
@@ -1,8 +1,5 @@
-use std::collections::HashSet;
-
 use anyhow::Context;
-
-use crate::uniprot::UniprotType;
+use std::collections::HashSet;
 
 // Constants to aid in parsing
 const COMMON_PREFIX_LEN: usize = "ID   ".len();
@@ -49,7 +46,7 @@ impl UniProtDATEntry {
     }
 
     /// Write an entry to stdout
-    pub fn write(&self, db_type: &UniprotType) {
+    pub fn write(&self, db_type: &str) {
         if self.name.is_empty() {
             eprintln!(
                 "Could not find a name for entry AC-{}",
@@ -66,7 +63,7 @@ impl UniProtDATEntry {
             self.ec_references.join(";"),
             self.go_references.join(";"),
             self.ip_references.join(";"),
-            db_type.to_str(),
+            db_type,
             self.taxon_id
         )
     }
diff --git a/scripts/helper_scripts/unipept-database-rs/src/lib.rs b/scripts/helper_scripts/unipept-database-rs/src/lib.rs
index 497dd646..9b309f52 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/lib.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/lib.rs
@@ -2,5 +2,4 @@ pub mod calculate_lcas;
 pub mod dat_parser;
 pub mod taxons_lineages;
 pub mod taxons_uniprots_tables;
-pub mod uniprot;
 pub mod utils;
diff --git a/scripts/helper_scripts/unipept-database-rs/src/taxons_uniprots_tables/table_writer.rs b/scripts/helper_scripts/unipept-database-rs/src/taxons_uniprots_tables/table_writer.rs
index ec9dd1ab..d92b9a15 100644
--- a/scripts/helper_scripts/unipept-database-rs/src/taxons_uniprots_tables/table_writer.rs
+++ b/scripts/helper_scripts/unipept-database-rs/src/taxons_uniprots_tables/table_writer.rs
@@ -112,6 +112,7 @@ impl TableWriter {
                 id,
                 sequence,
                 &summary,
+                entry.taxon_id,
             )
             .context("Failed to write peptide")?;
         }
@@ -125,17 +126,19 @@ impl TableWriter {
         id: i64,
         original_sequence: &[u8],
         annotations: &String,
+        taxon_id: i32,
     ) -> Result<()> {
         self.peptide_count += 1;
 
         writeln!(
             &mut self.peptides,
-            "{}\t{}\t{}\t{}\t{}",
+            "{}\t{}\t{}\t{}\t{}\t{}",
             self.peptide_count,
             String::from_utf8_lossy(&sequence),
             String::from_utf8_lossy(original_sequence),
             id,
-            annotations
+            annotations,
+            taxon_id
         )
         .context("Error writing to TSV")?;
 
diff --git a/scripts/helper_scripts/unipept-database-rs/src/uniprot/mod.rs b/scripts/helper_scripts/unipept-database-rs/src/uniprot/mod.rs
deleted file mode 100644
index ae293ece..00000000
--- a/scripts/helper_scripts/unipept-database-rs/src/uniprot/mod.rs
+++ /dev/null
@@ -1,15 +0,0 @@
-/// Enum for the different kinds of databases
-#[derive(clap::ValueEnum, Clone, Debug)]
-pub enum UniprotType {
-    Swissprot,
-    Trembl,
-}
-
-impl UniprotType {
-    pub fn to_str(&self) -> &str {
-        match self {
-            UniprotType::Swissprot => "swissprot",
-            UniprotType::Trembl => "trembl",
-        }
-    }
-}
diff --git a/scripts/parallel_load.sh b/scripts/parallel_load.sh
index 88224212..fcbfc571 100755
--- a/scripts/parallel_load.sh
+++ b/scripts/parallel_load.sh
@@ -1,5 +1,4 @@
 shopt -s expand_aliases
-alias zcat="pigz -cd"
 
 export db=unipept
 export user=root
@@ -9,16 +8,16 @@ dir="$1"
 
 function load_table() {
     file=$1
-    tbl=`echo $file | sed "s/.tsv.gz//"`
-    echo "zcatting - LOAD DATA LOCAL INFILE '$file' INTO TABLE $tbl"
-    zcat $file | mariadb --local-infile=1 -u$user -p$pass $db -e "LOAD DATA LOCAL INFILE '/dev/stdin' INTO TABLE $tbl;SHOW WARNINGS" 2>&1
+    tbl=`echo $file | sed "s/.tsv.lz4//"`
+    echo "lz4catting - LOAD DATA LOCAL INFILE '$file' INTO TABLE $tbl"
+    lz4 -dc $file | mysql --local-infile=1 -u$user -p$pass $db -e "LOAD DATA LOCAL INFILE '/dev/stdin' INTO TABLE $tbl;SHOW WARNINGS" 2>&1
 }
 
 export -f load_table
 
 cd "$dir"
 
-parallel load_table ::: *.tsv.gz
+parallel load_table ::: *.tsv.lz4
 
 cd "-"