Static Database #101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Static Database | |
on: | |
workflow_dispatch: | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
- cron: '0 0 1 * *' | |
jobs: | |
generate_static_database: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: dtolnay/rust-toolchain@stable | |
- uses: Swatinem/rust-cache@v2 | |
id: cache | |
with: | |
shared-key: ${{ env.CACHE_KEY }} | |
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch | |
if: ${{ !steps.cache.outputs.cache-hit }} | |
- run: ./scripts/build_binaries.sh | |
- name: Get current date | |
id: date | |
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV | |
- name: Install required utilities | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel libxml2-utils | |
- name: Download Taxdmp file | |
shell: bash | |
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip | |
- name: Generate tsv.gz files | |
shell: bash | |
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output" | |
- name: Build SQLite database from generated files | |
shell: bash | |
run: | | |
# Initialize the database | |
sqlite3 output.db < workflows/static_database/structure.sql | |
# Read all generated data into this database | |
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers' | |
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms' | |
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries' | |
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons' | |
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages' | |
# Create virtual tables | |
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql | |
# Compress the database before uploading it to a Github release | |
zip unipept-static-db-${{ env.date }}.zip output.db | |
- name: Upload or Update Release | |
id: upload_or_update_release | |
uses: softprops/action-gh-release@v1 | |
with: | |
files: | | |
./unipept-static-db-${{ env.date }}.zip | |
./taxdmp.zip | |
tag_name: database-${{ env.date }} | |
name: Static database ${{ env.date }} | |
commitish: ${{ github.sha }} | |
draft: false | |
prerelease: false | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} |