Skip to content

Commit

Permalink
Add scripts for parallel loading and indexing dbs
Browse files Browse the repository at this point in the history
  • Loading branch information
pverscha committed Jul 27, 2023
1 parent f7dd91b commit 60c26af
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 0 deletions.
45 changes: 45 additions & 0 deletions scripts/parallel_index.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#!/bin/bash

# Define MySQL connection parameters
DB_USER="unipept"
DB_PASSWORD="unipept"
DB_HOST="localhost"
DB_NAME="unipept"

# Function to add an index in the background
add_index() {
local table_name=$1
local column_name=$2

# Execute the "add index" statement
mysql -u "$DB_USER" -p"$DB_PASSWORD" -h "$DB_HOST" -e "ALTER TABLE $DB_NAME.$table_name ADD INDEX idx_$column_name ($column_name);" &
}

# List of tables and columns for which you want to add indexes
table_columns=(
"uniprot_entries:taxon_id"
"uniprot_entries:uniprot_accession_number"
"ec_numbers:code"
"go_terms:code"
"sequences:sequence"
"sequences:lca"
"sequences:lca_il"
"peptides:sequence_id"
"peptides:uniprot_entry_id"
"peptides:original_sequence_id"
"go_cross_references:uniprot_entry_id"
"ec_cross_references:uniprot_entry_id"
"interpro_cross_references:uniprot_entry_id"
)

# Loop through the list and add indexes in parallel
for entry in "${table_columns[@]}"; do
table=${entry%%:*}
column=${entry#*:}
add_index "$table" "$column"
done

# Wait for all background jobs to finish
wait

echo "All index statements have been executed."
25 changes: 25 additions & 0 deletions scripts/parallel_load.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
shopt -s expand_aliases
alias zcat="pigz -cd"

export db=unipept
export user=root
export pass=unipept

dir="$1"

function load_table() {
file=$1
tbl=`echo $file | sed "s/.tsv.gz//"`
echo "zcatting - LOAD DATA LOCAL INFILE '$file' INTO TABLE $tbl"
zcat $file | mysql --local-infile=1 -u$user -p$pass $db -e "LOAD DATA LOCAL INFILE '/dev/stdin' INTO TABLE $tbl;SHOW WARNINGS" 2>&1
}

export -f load_table

cd "$dir"

parallel load_table ::: *.tsv.gz

cd "-"

echo "done"

0 comments on commit 60c26af

Please sign in to comment.