From bb98d06f0e6b7997d2aa158e7dcadf9aaaa66774 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Sun, 17 Mar 2024 13:07:07 +0100 Subject: [PATCH] Fix bugs related to lz4 decompression --- scripts/build_database.sh | 6 +++--- scripts/helper_scripts/.gitignore | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/build_database.sh b/scripts/build_database.sh index 5e7cffc..ac0d2b0 100755 --- a/scripts/build_database.sh +++ b/scripts/build_database.sh @@ -732,7 +732,7 @@ create_kmer_index() { log "Started the construction of the $KMER_LENGTH-mer index." for PREFIX in A C D E F G H I K L M N P Q R S T V W Y; do pv -N $PREFIX "$OUTPUT_DIR/uniprot_entries.tsv.lz4" \ - | gunzip \ + | $CMD_LZ4CAT \ | cut -f4,7 \ | grep "^[0-9]* [ACDEFGHIKLMNPQRSTVWY]*$" \ | umgap splitkmers -k"$KMER_LENGTH" \ @@ -740,7 +740,7 @@ create_kmer_index() { | LC_ALL=C $CMD_SORT \ | sed "s/^/$PREFIX/" done \ - | umgap joinkmers "$(guz "$OUTPUT_DIR/taxons.tsv.lz4")" \ + | umgap joinkmers "$(luz "$OUTPUT_DIR/taxons.tsv.lz4")" \ | cut -d' ' -f1,2 \ | umgap buildindex \ > "$OUTPUT_DIR/$KMER_LENGTH-mer.index" @@ -755,7 +755,7 @@ create_tryptic_index() { have "$TABDIR/sequences.tsv.lz4" || return log "Started the construction of the tryptic index." pv "$TABDIR/sequences.tsv.lz4" \ - | gunzip \ + | $CMD_LZ4CAT \ | cut -f2,3 \ | grep -v "\\N" \ | umgap buildindex \ diff --git a/scripts/helper_scripts/.gitignore b/scripts/helper_scripts/.gitignore index 38df1f8..de53698 100644 --- a/scripts/helper_scripts/.gitignore +++ b/scripts/helper_scripts/.gitignore @@ -2,6 +2,7 @@ dat-parser functional-analysis lcas +substitute-aas taxa-by-chunk taxons-lineages taxons-uniprots-tables