Skip to content

Static Database

Static Database #89

name: Database
on:
workflow_dispatch:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '0 0 1 * *'
jobs:
generate_static_database:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
id: cache
with:
shared-key: ${{ env.CACHE_KEY }}
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch
if: ${{ !steps.cache.outputs.cache-hit }}
- run: ./scripts/build_binaries.sh
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Install required utilities
run: |
sudo apt-get update
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel
- name: Download Taxdmp file
shell: bash
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
- name: Generate tsv.gz files
shell: bash
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output"
- name: Build SQLite database from generated files
shell: bash
run: |
# Initialize the database
sqlite3 output.db < workflows/static_database/structure.sql
# Read all generated data into this database
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers'
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms'
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries'
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons'
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages'
# Create virtual tables
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql
# Compress the database before uploading it to a Github release
zip output.zip output.db
- name: Create new tag
uses: rickstaa/action-create-tag@v1
id: "tag_create"
with:
tag: database-${{ steps.date.outputs.date }}
message: "Static information database built on ${{ steps.date.outputs.date }}"
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: database-${{ steps.date.outputs.date }}
release_name: Static database ${{ steps.date.outputs.date }}
draft: false
prerelease: false
- name: Upload Static Database Release Asset
id: upload-database-release-asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./output.zip
asset_name: unipept-static-db-${{ steps.date.outputs.date }}.zip
asset_content_type: application/zip
- name: Upload NCBI Taxdmp Release Asset
id: upload-taxdmp-release-asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./taxdmp.zip
asset_name: ncbi-taxdmp.zip
asset_content_type: application/zip