Static Database #93
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Database | |
on: | |
workflow_dispatch: | |
schedule: | |
# * is a special character in YAML so you have to quote this string | |
- cron: '0 0 1 * *' | |
jobs: | |
generate_static_database: | |
runs-on: ubuntu-latest | |
steps: | |
- uses: actions/checkout@v4 | |
- uses: dtolnay/rust-toolchain@stable | |
- uses: Swatinem/rust-cache@v2 | |
id: cache | |
with: | |
shared-key: ${{ env.CACHE_KEY }} | |
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch | |
if: ${{ !steps.cache.outputs.cache-hit }} | |
- run: ./scripts/build_binaries.sh | |
- name: Get current date | |
id: date | |
run: echo "::set-output name=date::$(date +'%Y-%m-%d')" | |
- name: Install required utilities | |
run: | | |
sudo apt-get update | |
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel | |
- name: Download Taxdmp file | |
shell: bash | |
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip | |
- name: Generate tsv.gz files | |
shell: bash | |
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output" | |
- name: Build SQLite database from generated files | |
shell: bash | |
run: | | |
# Initialize the database | |
sqlite3 output.db < workflows/static_database/structure.sql | |
# Read all generated data into this database | |
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers' | |
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms' | |
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries' | |
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons' | |
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages' | |
# Create virtual tables | |
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql | |
# Compress the database before uploading it to a Github release | |
zip output.zip output.db | |
- name: Create new tag | |
uses: rickstaa/action-create-tag@v1 | |
id: "tag_create" | |
with: | |
tag: database-${{ steps.date.outputs.date }} | |
message: "Static information database built on ${{ steps.date.outputs.date }}" | |
- name: Create Release | |
id: create_release | |
uses: actions/create-release@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
tag_name: database-${{ steps.date.outputs.date }} | |
release_name: Static database ${{ steps.date.outputs.date }} | |
draft: false | |
prerelease: false | |
- name: Upload Static Database Release Asset | |
id: upload-database-release-asset | |
uses: actions/upload-release-asset@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
asset_path: ./output.zip | |
asset_name: unipept-static-db-${{ steps.date.outputs.date }}.zip | |
asset_content_type: application/zip | |
- name: Upload NCBI Taxdmp Release Asset | |
id: upload-taxdmp-release-asset | |
uses: actions/upload-release-asset@v1 | |
env: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
with: | |
upload_url: ${{ steps.create_release.outputs.upload_url }} | |
asset_path: ./taxdmp.zip | |
asset_name: ncbi-taxdmp.zip | |
asset_content_type: application/zip |