Skip to content

Commit

Permalink
skip sorting when building the suffix array tables
Browse files Browse the repository at this point in the history
  • Loading branch information
tibvdm committed Aug 6, 2024
1 parent b64e684 commit 256f7d5
Showing 1 changed file with 13 additions and 6 deletions.
19 changes: 13 additions & 6 deletions scripts/build_database.sh
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,10 @@ filter_sources_by_taxa() {
done
}

# First argument indicates whe
create_most_tables() {
skip_sort="$1"

have "$OUTPUT_DIR/taxons.tsv.lz4" || return
log "Started calculation of most tables."

Expand All @@ -578,19 +581,23 @@ create_most_tables() {
--go "$(lz "$OUTPUT_DIR/go_cross_references.tsv.lz4")" \
--interpro "$(lz "$OUTPUT_DIR/interpro_cross_references.tsv.lz4")"

log "Started sorting peptides table"
if [[ -n "$skip_sort" ]]; then
log "Started sorting peptides table"

$CMD_LZ4CAT "$INTDIR/peptides-out.tsv.lz4" \
| LC_ALL=C $CMD_SORT -k2 \
| $CMD_LZ4 > "$INTDIR/peptides-equalized.tsv.lz4"
$CMD_LZ4CAT "$INTDIR/peptides-out.tsv.lz4" \
| LC_ALL=C $CMD_SORT -k2 \
| $CMD_LZ4 > "$INTDIR/peptides-equalized.tsv.lz4"
fi

log "Removing peptides-out.tsv.lz4"
rm "$INTDIR/peptides-out.tsv.lz4"
log "Finished calculation of most tables with status $?"
}

create_tables_and_filter() {
filter_sources_by_taxa | create_most_tables
skip_sort="$1"

filter_sources_by_taxa | create_most_tables "$skip_sort"
}


Expand Down Expand Up @@ -883,7 +890,7 @@ tryptic-index)
suffix-array)
create_taxon_tables
download_and_convert_all_sources
create_tables_and_filter
create_tables_and_filter "skip sort"
fetch_ec_numbers
fetch_go_terms
fetch_interpro_entries
Expand Down

0 comments on commit 256f7d5

Please sign in to comment.