From 5038d02c2a0d458fa1a6f2a1a3a0efa3f7139f90 Mon Sep 17 00:00:00 2001 From: stijndcl Date: Wed, 3 Apr 2024 14:16:58 +0200 Subject: [PATCH] Add sort_peptides back temporarily --- scripts/build_database.sh | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/scripts/build_database.sh b/scripts/build_database.sh index 695d7e4..8f9279d 100755 --- a/scripts/build_database.sh +++ b/scripts/build_database.sh @@ -661,6 +661,17 @@ calculate_original_fas() { } +sort_peptides() { + have "$INTDIR/peptides_by_original.tsv.lz4" || return + log "Started sorting the peptides table." + mkdir -p "$OUTPUT_DIR" + $CMD_LZ4CAT "$INTDIR/peptides_by_original.tsv.lz4" \ + | LC_ALL=C $CMD_SORT -n \ + | $CMD_LZ4 - > "$OUTPUT_DIR/peptides.tsv.lz4" + log "Finished sorting the peptides table." +} + + create_sequence_table() { have "$INTDIR/LCAs_original.tsv.lz4" "$INTDIR/LCAs_equalized.tsv.lz4" "$INTDIR/FAs_original.tsv.lz4" "$INTDIR/FAs_equalized.tsv.lz4" "$INTDIR/sequences.tsv.lz4" || return log "Started the creation of the sequences table." @@ -802,6 +813,8 @@ database) wait $pid2 wait $pid3 wait $pid4 + reportProgress "-1" "Sorting peptides." 8 # TODO remove this step for Postgres + sort_peptides reportProgress "-1" "Creating sequence table." 9 create_sequence_table rm "$INTDIR/LCAs_original.tsv.lz4" @@ -810,8 +823,9 @@ database) rm "$INTDIR/FAs_equalized.tsv.lz4" rm "$INTDIR/sequences.tsv.lz4" rm "$INTDIR/peptides_by_equalized.tsv.lz4" - # Use the original sort as the result - mv "$INTDIR/peptides_by_original.tsv.lz4" "$OUTPUT_DIR/peptides.tsv.lz4" + # Use the original sort as the result TODO this does not play nicely for MariaDB + # mv "$INTDIR/peptides_by_original.tsv.lz4" "$OUTPUT_DIR/peptides.tsv.lz4" + rm "$INTDIR/peptides_by_original.tsv.lz4" reportProgress "-1" "Fetching EC numbers." 10 fetch_ec_numbers reportProgress "-1" "Fetching GO terms." 11