From f42263c2875a78500f141027803a38606ecb0f14 Mon Sep 17 00:00:00 2001 From: paulzierep Date: Thu, 10 Oct 2024 12:49:24 +0200 Subject: [PATCH] A data manager for motus (#1450) * init motus DB downloader, worked locally * fix citation * hide test, text with options * fix test and loc * use select type to limit choice of versions * fix RST * fix .sample file --- .../.shed.yml | 12 ++ .../data_manager_fetch_motus_db.py | 125 ++++++++++++++++++ .../data_manager/macros.xml | 30 +++++ .../data_manager/motus_db_fetcher.xml | 64 +++++++++ .../motus_test_DB_non_functional/readme.md | 4 + .../data_manager_conf.xml | 23 ++++ .../readme.md | 32 +++++ .../tool-data/motus_db_versioned.loc.sample | 9 ++ .../tool_data_table_conf.xml.sample | 7 + 9 files changed, 306 insertions(+) create mode 100644 data_managers/data_manager_motus_db_downloader/.shed.yml create mode 100644 data_managers/data_manager_motus_db_downloader/data_manager/data_manager_fetch_motus_db.py create mode 100644 data_managers/data_manager_motus_db_downloader/data_manager/macros.xml create mode 100644 data_managers/data_manager_motus_db_downloader/data_manager/motus_db_fetcher.xml create mode 100644 data_managers/data_manager_motus_db_downloader/data_manager/motus_test_DB_non_functional/readme.md create mode 100644 data_managers/data_manager_motus_db_downloader/data_manager_conf.xml create mode 100644 data_managers/data_manager_motus_db_downloader/readme.md create mode 100644 data_managers/data_manager_motus_db_downloader/tool-data/motus_db_versioned.loc.sample create mode 100644 data_managers/data_manager_motus_db_downloader/tool_data_table_conf.xml.sample diff --git a/data_managers/data_manager_motus_db_downloader/.shed.yml b/data_managers/data_manager_motus_db_downloader/.shed.yml new file mode 100644 index 0000000000..8f44e713d0 --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/.shed.yml @@ -0,0 +1,12 @@ +categories: +- Data Managers +- Metagenomics +description: Contains a data manager that populates the motus DB + tool data table. +homepage_url: https://github.com/bgruening/galaxytools/ +long_description: | + This data managers fetches data for motus +name: data_manager_motus +owner: iuc +remote_repository_url: https://github.com/bgruening/galaxytools/tree/master/data_managers/ +type: unrestricted diff --git a/data_managers/data_manager_motus_db_downloader/data_manager/data_manager_fetch_motus_db.py b/data_managers/data_manager_motus_db_downloader/data_manager/data_manager_fetch_motus_db.py new file mode 100644 index 0000000000..508d9d2a8b --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/data_manager/data_manager_fetch_motus_db.py @@ -0,0 +1,125 @@ +#!/usr/bin/env python + +import argparse +import json +import os +import shutil +import subprocess +import sys +import tarfile +from datetime import datetime + +import wget + +version_mapping = { + "3.1.0": "https://zenodo.org/records/7778108/files/db_mOTU_v3.1.0.tar.gz", + "3.0.1": "https://zenodo.org/records/5140350/files/db_mOTU_v3.0.1.tar.gz", + "3.0.0": "https://zenodo.org/records/5012106/files/db_mOTU_v3.0.0.tar.gz", +} + + +def download_untar_store(url, tmp_path, dest_path): + """ + Download a tar.gz file containing one folder, + extract that folder and move the content inside dest_path + """ + + extract_path = os.path.join(tmp_path, "extract") + + os.makedirs(tmp_path, exist_ok=True) + + # download data + filename = wget.download(url, out=tmp_path) + tarfile_path = os.path.join(tmp_path, filename) + tar = tarfile.open(tarfile_path) + tar.extractall(extract_path) + + if len(list(os.listdir(extract_path))) > 1: + print("More then one folder in zipped file, aborting !") + else: + for folder in os.listdir(extract_path): + folder_path = os.path.join(extract_path, folder) + + print(f"Copy data to {dest_path}") + shutil.copytree(folder_path, dest_path) + print("Done !") + + shutil.rmtree(tmp_path) + + +def main(): + # Parse Command Line + parser = argparse.ArgumentParser(description="Create data manager JSON.") + parser.add_argument("--out", dest="output", action="store", help="JSON filename") + parser.add_argument( + "--version", dest="version", action="store", help="Version of the DB" + ) + parser.add_argument( + "--test", + action="store_true", + help="option to test the script with an lighted database", + ) + + args = parser.parse_args() + + # the output file of a DM is a json containing args that can be used by the DM + # most tools mainly use these args to find the extra_files_path for the DM, which can be used + # to store the DB data + with open(args.output) as fh: + params = json.load(fh) + + workdir = params["output_data"][0]["extra_files_path"] + os.mkdir(workdir) + + time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") + db_value = "db_from_{0}".format(time) + db_path = os.path.join(workdir, db_value) + tmp_path = os.path.join(workdir, "tmp") + url = version_mapping[args.version] + + # create DB + if args.test: # the test only checks that the pharokka download script is available + + # check if link is there + command_args = ["wget", "--spider", url] + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error downloading motus database.", file=sys.stderr) + sys.exit(return_code) + + # copy the test DB + # TODO ones available: https://github.com/motu-tool/mOTUs/issues/121 + test_db_path = os.path.join( + os.path.dirname(os.path.realpath(__file__)), "motus_test_DB_non_functional" + ) + command_args = ["cp", "-r", test_db_path, db_path] + proc = subprocess.Popen(args=command_args, shell=False) + return_code = proc.wait() + if return_code: + print("Error copying motus database.", file=sys.stderr) + sys.exit(return_code) + + else: + + # download data + download_untar_store(url, tmp_path, db_path) + + # Update Data Manager JSON and write to file + data_manager_entry = { + "data_tables": { + "motus_db_versioned": { + "value": db_value, + "version": args.version, + "name": f"mOTUs DB version {args.version} downloaded at {datetime.now()}", + "path": db_path, + } + } + } + + with open(os.path.join(args.output), "w+") as fh: + json.dump(data_manager_entry, fh, sort_keys=True) + + +if __name__ == "__main__": + main() diff --git a/data_managers/data_manager_motus_db_downloader/data_manager/macros.xml b/data_managers/data_manager_motus_db_downloader/data_manager/macros.xml new file mode 100644 index 0000000000..45a8047c01 --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/data_manager/macros.xml @@ -0,0 +1,30 @@ + + + 3.1.0 + 0 + 22.05 + + + + motus + + + + + + + tar + python-wget + + + + + 10.1186/s40168-022-01410-z + + + + + + + + diff --git a/data_managers/data_manager_motus_db_downloader/data_manager/motus_db_fetcher.xml b/data_managers/data_manager_motus_db_downloader/data_manager/motus_db_fetcher.xml new file mode 100644 index 0000000000..772e41b0f8 --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/data_manager/motus_db_fetcher.xml @@ -0,0 +1,64 @@ + + + Fetches the DB required for mOTUs + + macros.xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/data_managers/data_manager_motus_db_downloader/data_manager/motus_test_DB_non_functional/readme.md b/data_managers/data_manager_motus_db_downloader/data_manager/motus_test_DB_non_functional/readme.md new file mode 100644 index 0000000000..3a4b4c15ec --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/data_manager/motus_test_DB_non_functional/readme.md @@ -0,0 +1,4 @@ +# Test DB + +This test DB does not work with motus, it is only intended to test the DM, +replace with a real test DB if available ! \ No newline at end of file diff --git a/data_managers/data_manager_motus_db_downloader/data_manager_conf.xml b/data_managers/data_manager_motus_db_downloader/data_manager_conf.xml new file mode 100644 index 0000000000..bfa5caa62c --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/data_manager_conf.xml @@ -0,0 +1,23 @@ + + + + + + + + + + + + ${path} + + motus_database/${value}/db_mOTU + + ${GALAXY_DATA_MANAGER_DATA_PATH}/motus_database/${value}/db_mOTU + abspath + + + + + + diff --git a/data_managers/data_manager_motus_db_downloader/readme.md b/data_managers/data_manager_motus_db_downloader/readme.md new file mode 100644 index 0000000000..b0bff9fc85 --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/readme.md @@ -0,0 +1,32 @@ +# Serve locally + +* Install conda/mamba + +``` +mamba create -n planemo-env python=3.7 +mamba activate planemo-env +pip install -U planemo +``` + +## Run DM togehter with tool + +``` +cd /motus-DM +planemo serve data_manager/motus_db_fetcher.xml /motus_profiler.xml --biocontainers --galaxy_root ~/git/galaxy +``` + +## Check if tool and DM work together + +In Galaxy go to: + +* Admin +* Local Data +* Check if DM is in **Installed Data Managers** +* Click it +* Run the tool with Database type 3.1.0 +* Admin +* Data Tables +* motus_db_versioned +* Check if new table was made +* Go to the tool +* Check if new table can be found by: `A pre-installed mOTUs database` \ No newline at end of file diff --git a/data_managers/data_manager_motus_db_downloader/tool-data/motus_db_versioned.loc.sample b/data_managers/data_manager_motus_db_downloader/tool-data/motus_db_versioned.loc.sample new file mode 100644 index 0000000000..c8490bb582 --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/tool-data/motus_db_versioned.loc.sample @@ -0,0 +1,9 @@ +#This is a sample file distributed with Galaxy that enables tools +#to use a pharokka DB folder. The motus_db_versioned.loc +#file needs this format (longer white space is the TAB character): + +# + +# for example: + +db_from_2024-07-11T081301Z 3.1.0 mOTUs DB version 3.1.0 downloaded at 2024-07-11 08:13:01.698939 /galaxy/tool-data/motus_database/db_from_2024-07-11T081301Z/db_mOTU \ No newline at end of file diff --git a/data_managers/data_manager_motus_db_downloader/tool_data_table_conf.xml.sample b/data_managers/data_manager_motus_db_downloader/tool_data_table_conf.xml.sample new file mode 100644 index 0000000000..65a881ad7a --- /dev/null +++ b/data_managers/data_manager_motus_db_downloader/tool_data_table_conf.xml.sample @@ -0,0 +1,7 @@ + + + + value, version, name, path + +
+
\ No newline at end of file