Skip to content

Commit

Permalink
A data manager for motus (bgruening#1450)
Browse files Browse the repository at this point in the history
* init motus DB downloader, worked locally

* fix citation

* hide test, text with options

* fix test and loc

* use select type to limit choice of versions

* fix RST

* fix .sample file
  • Loading branch information
paulzierep authored Oct 10, 2024
1 parent 53b8f23 commit f42263c
Show file tree
Hide file tree
Showing 9 changed files with 306 additions and 0 deletions.
12 changes: 12 additions & 0 deletions data_managers/data_manager_motus_db_downloader/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
categories:
- Data Managers
- Metagenomics
description: Contains a data manager that populates the motus DB
tool data table.
homepage_url: https://github.com/bgruening/galaxytools/
long_description: |
This data managers fetches data for motus
name: data_manager_motus
owner: iuc
remote_repository_url: https://github.com/bgruening/galaxytools/tree/master/data_managers/
type: unrestricted
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#!/usr/bin/env python

import argparse
import json
import os
import shutil
import subprocess
import sys
import tarfile
from datetime import datetime

import wget

version_mapping = {
"3.1.0": "https://zenodo.org/records/7778108/files/db_mOTU_v3.1.0.tar.gz",
"3.0.1": "https://zenodo.org/records/5140350/files/db_mOTU_v3.0.1.tar.gz",
"3.0.0": "https://zenodo.org/records/5012106/files/db_mOTU_v3.0.0.tar.gz",
}


def download_untar_store(url, tmp_path, dest_path):
"""
Download a tar.gz file containing one folder,
extract that folder and move the content inside dest_path
"""

extract_path = os.path.join(tmp_path, "extract")

os.makedirs(tmp_path, exist_ok=True)

# download data
filename = wget.download(url, out=tmp_path)
tarfile_path = os.path.join(tmp_path, filename)
tar = tarfile.open(tarfile_path)
tar.extractall(extract_path)

if len(list(os.listdir(extract_path))) > 1:
print("More then one folder in zipped file, aborting !")
else:
for folder in os.listdir(extract_path):
folder_path = os.path.join(extract_path, folder)

print(f"Copy data to {dest_path}")
shutil.copytree(folder_path, dest_path)
print("Done !")

shutil.rmtree(tmp_path)


def main():
# Parse Command Line
parser = argparse.ArgumentParser(description="Create data manager JSON.")
parser.add_argument("--out", dest="output", action="store", help="JSON filename")
parser.add_argument(
"--version", dest="version", action="store", help="Version of the DB"
)
parser.add_argument(
"--test",
action="store_true",
help="option to test the script with an lighted database",
)

args = parser.parse_args()

# the output file of a DM is a json containing args that can be used by the DM
# most tools mainly use these args to find the extra_files_path for the DM, which can be used
# to store the DB data
with open(args.output) as fh:
params = json.load(fh)

workdir = params["output_data"][0]["extra_files_path"]
os.mkdir(workdir)

time = datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ")
db_value = "db_from_{0}".format(time)
db_path = os.path.join(workdir, db_value)
tmp_path = os.path.join(workdir, "tmp")
url = version_mapping[args.version]

# create DB
if args.test: # the test only checks that the pharokka download script is available

# check if link is there
command_args = ["wget", "--spider", url]
proc = subprocess.Popen(args=command_args, shell=False)
return_code = proc.wait()
if return_code:
print("Error downloading motus database.", file=sys.stderr)
sys.exit(return_code)

# copy the test DB
# TODO ones available: https://github.com/motu-tool/mOTUs/issues/121
test_db_path = os.path.join(
os.path.dirname(os.path.realpath(__file__)), "motus_test_DB_non_functional"
)
command_args = ["cp", "-r", test_db_path, db_path]
proc = subprocess.Popen(args=command_args, shell=False)
return_code = proc.wait()
if return_code:
print("Error copying motus database.", file=sys.stderr)
sys.exit(return_code)

else:

# download data
download_untar_store(url, tmp_path, db_path)

# Update Data Manager JSON and write to file
data_manager_entry = {
"data_tables": {
"motus_db_versioned": {
"value": db_value,
"version": args.version,
"name": f"mOTUs DB version {args.version} downloaded at {datetime.now()}",
"path": db_path,
}
}
}

with open(os.path.join(args.output), "w+") as fh:
json.dump(data_manager_entry, fh, sort_keys=True)


if __name__ == "__main__":
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?xml version="1.0"?>
<macros>
<token name="@TOOL_VERSION@">3.1.0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">22.05</token>
<xml name="biotools">
<xrefs>
<xref type="bio.tools">
motus
</xref>
</xrefs>
</xml>
<xml name="requirements">
<requirements>
<!-- <requirement type="package" version="@TOOL_VERSION@">motus</requirement> -->
<requirement type="package" version="1.34">tar</requirement>
<requirement type="package" version="3.2">python-wget</requirement>
</requirements>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1186/s40168-022-01410-z</citation>
</citations>
</xml>
<xml name="creator">
<creator>
<person givenName="Paul" familyName="Zierep" email="[email protected]" />
</creator>
</xml>
</macros>
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
<?xml version="1.0"?>
<tool id="motus_db_fetcher" name="mOTUs DB fetcher" tool_type="manage_data" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Fetches the DB required for mOTUs</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="biotools" />
<expand macro="requirements" />
<!-- <expand macro="version" /> -->

<command detect_errors="exit_code">
<![CDATA[
python '$__tool_directory__/data_manager_fetch_motus_db.py' --out '${out_file}' --version '${version}'
$test_data_manager
]]>
</command>
<inputs>
<param name="test_data_manager" type="hidden" truevalue="--test" falsevalue="" checked="False" label="Copy minimal test DB and create mock data table entry." />

<!-- <param name="test_data_manager" type="text" value=""/> -->
<param argument="--version" type="select" multiple="false" label="Database Version" help="Check https://zenodo.org/records/7778108 for current version. The default value is recommended.">
<option value="3.1.0">3.1.0</option>
<option value="3.0.1">3.0.1</option>
<option value="3.0.0">3.0.0</option>
</param>

</inputs>
<outputs>
<data format="data_manager_json" name="out_file" />
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="test_data_manager" value="--test"/>
<param name="version" value="3.1.0"/>
<output name="out_file">
<assert_contents>
<has_text text="mOTUs DB version 3.1.0 downloaded at"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
mOTUs
=====
The mOTUs profiler is a tool designed for taxonomic profiling of metagenomic and metatranscriptomic data.
It allows for the identification and quantification of microbial community composition directly from sequencing reads.
mOTUs utilizes a database of marker genes identified across a wide range of microbial taxa, enabling the detection of known and unknown microorganisms in your sample data.
Additional Resources
====================
For a more comprehensive understanding of mOTUs and detailed usage instructions, please visit the mOTUs GitHub repository:
mOTUs GitHub Repository: [https://github.com/motu-tool/mOTUs](https://github.com/motu-tool/mOTUs)
Data Manager
============
This DM download the available DBs from: https://zenodo.org/records/5012106
]]></help>
<expand macro="citations" />
<expand macro="creator" />
</tool>
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Test DB

This test DB does not work with motus, it is only intended to test the DM,
replace with a real test DB if available !
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?xml version="1.0"?>
<data_managers>

<data_manager tool_file="data_manager/motus_db_fetcher.xml" id="motus_db_fetcher">
<data_table name="motus_db_versioned">
<output>
<column name="value" />
<column name="version" />
<column name="name" />
<column name="path" output_ref="out_file" >
<move type="directory" relativize_symlinks="True">
<source>${path}</source>
<!-- the /db_mOTU path is important since motus looks for a folder named like this -->
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">motus_database/${value}/db_mOTU</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/motus_database/${value}/db_mOTU</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
</output>
</data_table>
</data_manager>

</data_managers>
32 changes: 32 additions & 0 deletions data_managers/data_manager_motus_db_downloader/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Serve locally

* Install conda/mamba

```
mamba create -n planemo-env python=3.7
mamba activate planemo-env
pip install -U planemo
```

## Run DM togehter with tool

```
cd <DM-path>/motus-DM
planemo serve data_manager/motus_db_fetcher.xml <motus_profiler PATH>/motus_profiler.xml --biocontainers --galaxy_root ~/git/galaxy
```

## Check if tool and DM work together

In Galaxy go to:

* Admin
* Local Data
* Check if DM is in **Installed Data Managers**
* Click it
* Run the tool with Database type 3.1.0
* Admin
* Data Tables
* motus_db_versioned
* Check if new table was made
* Go to the tool
* Check if new table can be found by: `A pre-installed mOTUs database`
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#This is a sample file distributed with Galaxy that enables tools
#to use a pharokka DB folder. The motus_db_versioned.loc
#file needs this format (longer white space is the TAB character):

#<unique_build_id> <version> <display_name> <DB_folder_path>

# for example:

db_from_2024-07-11T081301Z 3.1.0 mOTUs DB version 3.1.0 downloaded at 2024-07-11 08:13:01.698939 /galaxy/tool-data/motus_database/db_from_2024-07-11T081301Z/db_mOTU
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
<tables>
<!-- Location of motus DB file -->
<table name="motus_db_versioned" comment_char="#">
<columns>value, version, name, path</columns>
<file path="tool-data/motus_db_versioned.loc" />
</table>
</tables>

0 comments on commit f42263c

Please sign in to comment.