Skip to content

Static Database

Static Database #95

name: Static Database
on:
workflow_dispatch:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '0 0 1 * *'
jobs:
generate_static_database:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
id: cache
with:
shared-key: ${{ env.CACHE_KEY }}
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch
if: ${{ !steps.cache.outputs.cache-hit }}
- run: ./scripts/build_binaries.sh
- name: Get current date
id: date
run: echo "date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Install required utilities
run: |
sudo apt-get update
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel
- name: Download Taxdmp file
shell: bash
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
- name: Generate tsv.gz files
shell: bash
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output"
- name: Build SQLite database from generated files
shell: bash
run: |
# Initialize the database
sqlite3 output.db < workflows/static_database/structure.sql
# Read all generated data into this database
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers'
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms'
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries'
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons'
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages'
# Create virtual tables
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql
# Compress the database before uploading it to a Github release
zip unipept-static-db-${{ env.date }}.zip output.db
- name: Upload or Update Release
id: upload_or_update_release
uses: softprops/action-gh-release@v1
with:
files: |
./unipept-static-db-${{ env.date }}.zip
./taxdmp.zip
tag_name: database-${{ env.date }}
name: Static database ${{ env.date }}
commitish: ${{ github.sha }}
draft: false
prerelease: false
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}