Skip to content

Commit

Permalink
Fix bugs related to lz4 decompression
Browse files Browse the repository at this point in the history
  • Loading branch information
stijndcl committed Mar 17, 2024
1 parent a255ada commit bb98d06
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
6 changes: 3 additions & 3 deletions scripts/build_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -732,15 +732,15 @@ create_kmer_index() {
log "Started the construction of the $KMER_LENGTH-mer index."
for PREFIX in A C D E F G H I K L M N P Q R S T V W Y; do
pv -N $PREFIX "$OUTPUT_DIR/uniprot_entries.tsv.lz4" \
| gunzip \
| $CMD_LZ4CAT \
| cut -f4,7 \
| grep "^[0-9]* [ACDEFGHIKLMNPQRSTVWY]*$" \
| umgap splitkmers -k"$KMER_LENGTH" \
| sed -n "s/^$PREFIX//p" \
| LC_ALL=C $CMD_SORT \
| sed "s/^/$PREFIX/"
done \
| umgap joinkmers "$(guz "$OUTPUT_DIR/taxons.tsv.lz4")" \
| umgap joinkmers "$(luz "$OUTPUT_DIR/taxons.tsv.lz4")" \
| cut -d' ' -f1,2 \
| umgap buildindex \
> "$OUTPUT_DIR/$KMER_LENGTH-mer.index"
Expand All @@ -755,7 +755,7 @@ create_tryptic_index() {
have "$TABDIR/sequences.tsv.lz4" || return
log "Started the construction of the tryptic index."
pv "$TABDIR/sequences.tsv.lz4" \
| gunzip \
| $CMD_LZ4CAT \
| cut -f2,3 \
| grep -v "\\N" \
| umgap buildindex \
Expand Down
1 change: 1 addition & 0 deletions scripts/helper_scripts/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
dat-parser
functional-analysis
lcas
substitute-aas
taxa-by-chunk
taxons-lineages
taxons-uniprots-tables
Expand Down

0 comments on commit bb98d06

Please sign in to comment.