Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add deeparg #6646

Open
wants to merge 22 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
b4963fb
initiate deeparg wrapping
hugolefeuvre Oct 7, 2024
97fac0c
initiate deeparg predict
hugolefeuvre Oct 7, 2024
642476f
V1 of deeparg predict wrapping
hugolefeuvre Oct 8, 2024
046c1cc
Start deeparg short reads pipeline wrapping
hugolefeuvre Oct 8, 2024
7a6cbca
yield requirements for short read pipeline
hugolefeuvre Oct 9, 2024
3744c47
delete conda list in command
hugolefeuvre Oct 9, 2024
9253f50
Fix requirements
bebatut Oct 9, 2024
42bb122
initiate deeparg data_manager
hugolefeuvre Oct 9, 2024
d3c4e9c
DM modification and addition
hugolefeuvre Oct 10, 2024
00fc4b3
Temporary gunzip test files because failed with .gz
hugolefeuvre Oct 10, 2024
1dc7046
Short reads pipeline output, not sure to keep them
hugolefeuvre Oct 10, 2024
399c1aa
one more file
hugolefeuvre Oct 10, 2024
1181c77
delete results files don't used
hugolefeuvre Oct 11, 2024
b7724b7
modification about input extension
hugolefeuvre Oct 11, 2024
15d9c99
Merge branch 'galaxyproject:main' into deeparg
hugolefeuvre Oct 15, 2024
17342e1
Merge branch 'galaxyproject:main' into deeparg
hugolefeuvre Oct 30, 2024
5a6cb2a
modify test model option : from LS to SS
hugolefeuvre Oct 31, 2024
1578ae5
download deeparg db in xml because its too big and cant reduce it, on…
hugolefeuvre Dec 19, 2024
0f1efe3
modify DM, delete dbkey column
hugolefeuvre Dec 19, 2024
20a8a2a
Merge branch 'galaxyproject:main' into deeparg
hugolefeuvre Dec 19, 2024
c72a087
size files less then 1Mo
hugolefeuvre Dec 19, 2024
a33ee96
modify requirements
hugolefeuvre Dec 20, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions data_managers/data_manager_deeparg/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
name: data_manager_deeparg
owner: iuc
description: "DeepARG for Antibiotic Resistance Genes (ARGs) prediction"
homepage_url: "https://github.com/gaarangoa/deeparg"
long_description: |
A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes with short or long sequences
remote_repository_url: "https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_deeparg"
type: unrestricted
categories:
- Data Managers
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
<tool id="data_manager_deeparg" name="Download data for DeepARG" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" tool_type="manage_data" profile="22.05">
<description></description>
<macros>
<token name="@TOOL_VERSION@">1.0.4</token>
<token name="@VERSION_SUFFIX@">0</token>
</macros>
<requirements>
<requirement type="package" version="@TOOL_VERSION@">deeparg</requirement>
</requirements>
<stdio>
<exit_code range=":-1" level="fatal" description="Error: Cannot open file"/>
<exit_code range="1:" level="fatal" description="Error"/>
</stdio>
<command><![CDATA[
mkdir -p '$out_file.extra_files_path' &&
deeparg download_data -o 'deeparg_$version' &&
mv 'deeparg_$version' '$out_file.extra_files_path' &&
cp '$dmjson' '$out_file'
]]></command>
<configfiles>
<configfile name="dmjson"><![CDATA[
#from datetime import date
{
"data_tables":{
"deeparg_database_versioned":[
{
"value": "deeparg_$version-#echo date.today().strftime('%d%m%Y')#",
"name": "Files needed for running deepARG v-$version-#echo date.today().strftime('%d%m%Y')#",
"path": "deeparg_$version",
"db_version": "$version"
}
]
}
}]]>
</configfile>
</configfiles>
<inputs>
<param name="version" type="select" label="DB version">
<option value="1.0.4" selected="true">Data needed for running DeepARG v1.0.4</option>
</param>
</inputs>
<outputs>
<data name="out_file" format="data_manager_json" label="${tool.name}"/>
</outputs>
<tests>
<test expect_num_outputs="1">
<param name="version" value="1.0.4"/>
<output name="out_file">
<assert_contents>
<has_text text='"deeparg_database_versioned":'/>
<has_text text='"db_version": "1.0.4"'/>
<has_text_matching expression='"value": "deeparg_1.0.4-[0-9]{8}"'/>
<has_text_matching expression='"name": "Files needed for running deepARG v-1.0.4-[0-9]{8}"'/>
<has_text text='"path": "deeparg_1.0.4"'/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[
DeepARG is a tool to predict antibiotic resistance genes (ARGs) in metagenomic samples.
]]></help>
<citations>
<citation type="doi">10.1186/s40168-018-0401-z</citation>
</citations>
</tool>
21 changes: 21 additions & 0 deletions data_managers/data_manager_deeparg/data_manager_conf.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
<?xml version="1.0"?>
<data_managers>
<data_manager tool_file="data_manager/data_manager_deeparg.xml" id="data_manager_deeparg">
<data_table name="deeparg_database_versioned"> <!-- Defines a Data Table to be modified. -->
<output> <!-- Handle the output of the Data Manager Tool -->
<column name="value"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="name"/> <!-- columns that are going to be specified by the Data Manager Tool -->
<column name="path" output_ref="out_file">
<move type="directory">
<source>${path}</source>
<target base="${GALAXY_DATA_MANAGER_DATA_PATH}">deeparg_db/${path}</target>
</move>
<value_translation>${GALAXY_DATA_MANAGER_DATA_PATH}/deeparg_db/${path}</value_translation>
<value_translation type="function">abspath</value_translation>
</column>
<column name="db_version"/> <!-- columns that are going to be specified by the Data Manager Tool -->
</output>
</data_table>
</data_manager>
</data_managers>

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#This is a sample file distributed with Galaxy that enables tools
#to use a directory of metagenomics files.
#file has this format (white space characters are TAB characters)
#deeparg_1.0.4 Files needed for running deepARG (1.0.4) deeparg_1.0.4-20241010 /path/to/data 1.0.4
deeparg_1.0.4-19122024 Files needed for running deepARG v-1.0.4-19122024 /tmp/tmpizmxs2l_/galaxy-dev/tool-data/deeparg_db/deeparg_1.0.4 1.0.4
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#This is a sample file distributed with Galaxy that enables tools
#to use a directory of metagenomics files.
#file has this format (white space characters are TAB characters)
#deeparg_1.0.4 Files needed for running deepARG (1.0.4) deeparg_1.0.4-20241010 /path/to/data 1.0.4
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<tables>
<table name="deeparg_database_versioned" comment_char="#">
<columns>value, name, path, db_version</columns>
<file path="tool-data/deeparg_database_versioned.loc.sample"/>
</table>
</tables>
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
<tables>
<table name="deeparg_database_versioned" comment_char="#">
<columns>value, name, path, db_version</columns>
<file path="${__HERE__}/test-data/deeparg_database_versioned.loc.test"/>
</table>
</tables>
15 changes: 15 additions & 0 deletions tools/deeparg/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name: deeparg
owner: iuc
long_description: |
A deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes
categories:
- Sequence Analysis
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/deeparg
homepage_url: https://github.com/gaarangoa/deeparg
type: unrestricted
auto_tool_repositories:
name_template: "{{ tool_id }}"
description_template: "Wrapper for the DeepARG tool suite: {{ tool_name }}"
suite:
name: "suite_deeparg"
description: "DeepARG is a deep learning based approach to predict Antibiotic Resistance Genes (ARGs) from metagenomes with short or long sequences"
107 changes: 107 additions & 0 deletions tools/deeparg/deeparg_predict.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
<tool id="deeparg_predict" name="DeepARG predict" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Antibiotic Resistance Genes (ARGs) from metagenomes</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="xrefs"/>
<expand macro="requirements"/>
<command detect_errors="exit_code"><![CDATA[
##Used only for test
#if str($hide_db_build) == 'true':
deeparg download_data -o deeparg_1.0.4 &&
#end if
##
mkdir -p deeparg_predict_output &&
deeparg predict
--model '$model'
-i '$input'
-o 'deeparg_predict_output/deeparg_predict'
-d '$deeparg_db.fields.path'
--type '$type'
--min-prob $min_prob
--arg-alignment-identity $arg_alignment_identity
--arg-alignment-evalue $arg_alignment_evalue
--arg-alignment-overlap $arg_alignment_overlap
--arg-num-alignments-per-entry $arg_num_alignments_per_entry
##Used only for test
#if str($hide_db_build) == 'true':
&& rm -r deeparg_1.0.4
#end if
##
]]></command>
<inputs>
<param name="hide_db_build" type="hidden" value="" />
<param name="input" type="data" format="fasta" label="Input file"/>
<param name="deeparg_db" type="select" label="DeepARG database">
<options from_data_table="deeparg_database_versioned">
<validator message="No deeparg database is available" type="no_options"/>
</options>
</param>
<param argument="--model" type="select" label="Select model to use" >
<option value="SS" selected="true">SS (short sequences for reads)</option>
<option value="LS">LS (long sequences for genes)</option>
</param>
<param argument="--type" type="select" label="Molecular data type" >
<option value="nucl" selected="true">Nucleotid (default)</option>
<option value="prot">Protein</option>
</param>
<param argument="--min-prob" type="float" min="0" max="1" value="0.8" label="Minimum probability cutoff [Default: 0.8]" />
<param argument="--arg-alignment-identity" type="integer" min="0" value="50" label="Identity cutoff for sequence alignment [Default: 50]" />
<param argument="--arg-alignment-evalue" type="float" min="0" value="1e-10" label="Evalue cutoff [Default: 1e-10]" />
<param argument="--arg-alignment-overlap" type="float" min="0" max="1" value="0.8" label="Alignment read overlap [Default: 0.8]" />
<param argument="--arg-num-alignments-per-entry" type="integer" min="0" value="1000" label="Diamond, minimum number of alignments per entry [Default: 1000]" />
<section name="output_files" title="Selection of the output files">
<param name="output_selection" type="select" display="checkboxes" multiple="true" label="Output files selection">
<option value="file_ARG_tsv" selected="true">ARG detected with prob higher or equal to --prob in TSV</option>
<option value="file_potential_ARG_tsv" selected="true">ARG detected with prob below --prob in TSV</option>
<option value="file_all_hits_tsv" selected="false">All hits detected in TSV</option>
</param>
</section>
</inputs>
<outputs>
<data name="output_mapping_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.ARG" label="${tool.name} on ${on_string} : ARG detected (prob higher or equal to --prob)" >
<filter>output_files['output_selection'] and "file_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_mapping_potential_ARG" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.mapping.potential.ARG" label="${tool.name} on ${on_string} : Potential ARG (prob below --prob)" >
<filter>output_files['output_selection'] and "file_potential_ARG_tsv" in output_files['output_selection']</filter>
</data>
<data name="output_all_hits" format="tabular" from_work_dir="deeparg_predict_output/deeparg_predict.align.daa.tsv" label="${tool.name} on ${on_string} : all hits detected">
<filter>output_files['output_selection'] and "file_all_hits_tsv" in output_files['output_selection']</filter>
</data>
</outputs>
<tests>
<!-- Test 1 -->
<test expect_num_outputs="3">
<param name="hide_db_build" value="true" />
<param name="input" value="ORFs.fa" ftype="fasta"/>
<param name="deeparg_db" value="deeparg_1.0.4-19122024" />
<param name="model" value="SS"/>
<param name="type" value="nucl"/>
<section name="output_files">
<param name="output_selection" value="file_ARG_tsv,file_potential_ARG_tsv,file_all_hits_tsv"/>
</section>
<output name="output_mapping_ARG" ftype="tabular">
<assert_contents>
<has_text text="YP_003283625.1|FEATURES|tet(K)|tetracycline|tet(K)" />
<has_text text="RPOB2" />
</assert_contents>
</output>
<output name="output_mapping_potential_ARG" ftype="tabular">
<assert_contents>
<has_text text="gi:545254650:ref:WP_021551023.1:|FEATURES|mdtB|multidrug|mdtB" />
<has_text text="MUXB" />
</assert_contents>
</output>
<output name="output_all_hits" ftype="tabular">
<assert_contents>
<has_size value="226000" delta="10000" />
<has_text text="ADV91011.1|FEATURES|RbpA|rifamycin|RbpA" />
</assert_contents>
</output>
</test>
</tests>
<help>
Predict Antibiotic Resistance Genes (ARGs) from metagenomes
</help>
<expand macro="citations"/>
</tool>
Loading
Loading