-
Notifications
You must be signed in to change notification settings - Fork 2
88 lines (83 loc) · 3.79 KB
/
static_database.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
name: Database
on:
workflow_dispatch:
schedule:
# * is a special character in YAML so you have to quote this string
- cron: '0 0 1 * *'
jobs:
generate_static_database:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: dtolnay/rust-toolchain@stable
- uses: Swatinem/rust-cache@v2
id: cache
with:
shared-key: ${{ env.CACHE_KEY }}
- run: cd scripts/helper_scripts/unipept-database-rs && cargo fetch
if: ${{ !steps.cache.outputs.cache-hit }}
- run: ./scripts/build_binaries.sh
- name: Get current date
id: date
run: echo "::set-output name=date::$(date +'%Y-%m-%d')"
- name: Install required utilities
run: |
sudo apt-get update
sudo apt-get -y install git curl unzip gawk sqlite3 libsqlite3-dev pv nodejs wget uuid-runtime pigz lz4 parallel
- name: Download Taxdmp file
shell: bash
run: wget https://ftp.ncbi.nlm.nih.gov/pub/taxonomy/taxdmp.zip
- name: Generate tsv.gz files
shell: bash
run: ./scripts/build_database.sh static-database "swissprot,trembl" "https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.xml.gz,https://ftp.expasy.org/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.xml.gz" "output"
- name: Build SQLite database from generated files
shell: bash
run: |
# Initialize the database
sqlite3 output.db < workflows/static_database/structure.sql
# Read all generated data into this database
lz4cat output/ec_numbers.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin ec_numbers'
lz4cat output/go_terms.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin go_terms'
lz4cat output/interpro_entries.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin interpro_entries'
lz4cat output/taxons.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin taxons'
lz4cat output/lineages.tsv.lz4 | sqlite3 -csv -separator ' ' output.db '.import /dev/stdin lineages'
# Create virtual tables
sqlite3 output.db < workflows/static_database/init_virtual_tables.sql
# Compress the database before uploading it to a Github release
zip output.zip output.db
- name: Create new tag
uses: rickstaa/action-create-tag@v1
id: "tag_create"
with:
tag: database-${{ steps.date.outputs.date }}
message: "Static information database built on ${{ steps.date.outputs.date }}"
- name: Create Release
id: create_release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: database-${{ steps.date.outputs.date }}
release_name: Static database ${{ steps.date.outputs.date }}
draft: false
prerelease: false
- name: Upload Static Database Release Asset
id: upload-database-release-asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./output.zip
asset_name: unipept-static-db-${{ steps.date.outputs.date }}.zip
asset_content_type: application/zip
- name: Upload NCBI Taxdmp Release Asset
id: upload-taxdmp-release-asset
uses: actions/upload-release-asset@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ steps.create_release.outputs.upload_url }}
asset_path: ./taxdmp.zip
asset_name: ncbi-taxdmp.zip
asset_content_type: application/zip