galaxyproteomics · reid-wagner · May 11, 2021 · Jun 3, 2021 · Jun 5, 2021 · Jun 5, 2021
diff --git a/tools/proteore_reactome/.shed.yml b/tools/proteore_reactome/.shed.yml
@@ -0,0 +1,6 @@
+categories: [Proteomics]
+description: ProteoRE reactome
+long_description: Query Reactome with your IDs list
+name: proteore_reactome
+owner: galaxyp
+
diff --git a/tools/proteore_reactome/README.rst b/tools/proteore_reactome/README.rst
@@ -0,0 +1,21 @@
+Wrapper for Reactome Tool
+=========================
+
+Reactome web service (https://reactome.org)
+
+**Galaxy integration**
+
+T.P. Lien Nguyen, Florence Combes, Yves Vandenbrouck CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux INRA, Paris-Saclay University, MAIAGE Unit,Migale Bioinformatics platform
+
+Contact [email protected] for any questions or concerns about the Galaxy implementation of this tool.
+
+-------------------------
+
+Reactome software provides service of creating diagram representing the relations between the biological processes. 
+This tool allows linking to Reactome web service with pre-loaded data from a list of IDs, a file containing IDs or from a column of a complexed file.
+
+**For the rows that have more than 1 ID, only the first one is taken into account.**
+
+**Supported IDs: Uniprot accession number (e.g. P01023), Entrez gene ID (e.g.7157), gene name (e.g. AQP7). If there is any ID containing invalid characters, it will be removed from the queue and placed in "Invalid identifiers" file.**
diff --git a/tools/proteore_reactome/reactome_analysis.py b/tools/proteore_reactome/reactome_analysis.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+import argparse
+import csv
+import json
+import os
+import re
+
+CURRENT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def id_valid(identifiers):
+    """
+    Validate IDs if they contain special characters
+    """
+    res = []
+    remove = []
+    for id in identifiers:
+        id = id.split(";")[0]
+        if re.match("^[A-Za-z0-9_-]*$", id):
+            res.append(id)
+        else:
+            remove.append(id)
+    return res, remove
+
+
+def isnumber(format, n):
+    """
+    Check if an variable is numeric
+    """
+    float_format = re.compile(r"^[-]?[1-9][0-9]*.?[0-9]+$")
+    int_format = re.compile(r"^[-]?[1-9][0-9]*$")
+    test = ""
+    if format == "int":
+        test = re.match(int_format, n)
+    elif format == "float":
+        test = re.match(float_format, n)
+    if test:
+        return True
+    else:
+        return False
+
+
+def data_json(identifiers):
+    """
+    Submit IDs list to Reactome and return results in json format
+    Return error in HTML format if web service is not available
+    """
+    trash = []
+    if identifiers[1] == "list":
+        ids = identifiers[0].split()
+        ids = [x.split(";") for x in ids]
+        ids = [item.strip() for sublist in ids for item in sublist if item != '']  # noqa 501
+        json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1" % ids).read()  # noqa 501
+        if len(id_valid(identifiers[0].split())[1]) > 0:
+            trash = id_valid(identifiers[0].split())[1]
+    elif identifiers[1] == "file":
+        header = identifiers[2]
+        with open(identifiers[0], "r") as mq:
+            file_content = csv.reader(mq, delimiter="\t")
+            file_content = list(file_content)   # csv object to list
+            ncol = identifiers[3]
+            if isnumber("int", ncol.replace("c", "")):
+                if header == "true":
+                    idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content[1:]]]  # noqa 501
+                else:
+                    idens = [x for x in [line[int(ncol.replace("c", ""))-1].split(";") for line in file_content]]  # noqa 501
+                # flat list of list of lists, remove empty items
+                idens = [item.strip() for sublist in idens for item in sublist if item != '']  # noqa 501
+                ids = "\n".join(id_valid(idens)[0])
+                json_string = os.popen("curl -H \"Content-Type: text/plain\" -d \"$(printf '%s')\" -X POST --url www.reactome.org/AnalysisService/identifiers/\?pageSize\=1\&page\=1 2> stderr" % ids).read()  # noqa 501
+                if len(id_valid(idens)[1]) > 0:
+                    trash = id_valid(idens)[1]
+    # print(json_string)
+    j = json.loads(json_string)
+    print("Identifiers not found: " + str(j["identifiersNotFound"]))
+    print("Pathways found: " + str(j["pathwaysFound"]))
+    return json_string, trash
+
+
+def write_output(filename, json_string, species, trash_file, trash):
+    """
+    Replace json result in template and print to output
+    """
+    template = open(os.path.join(CURRENT_DIR, "template.html"))
+    output = open(filename, "w")
+    try:
+        for line in template:
+            if "{token}" in line:
+                line = line.replace("{species}", species)
+                line = line.replace("{token}", json.loads(json_string)["summary"]["token"])  # noqa 501
+            output.write(line)
+    except ValueError:
+        output.write("An error occurred due to unavailability of Reactome web service. Please return later.")  # noqa 501
+    template.close()
+    output.close()
+
+    if trash:
+        # print(trash)
+        trash_out = open(trash_file, "w")
+        trash_out.write("\n".join(trash))
+        trash_out.close()
+
+
+def options():
+    parser = argparse.ArgumentParser()
+    argument = parser.add_argument("--json", nargs="+", required=True)
+    argument = parser.add_argument("--output", default="output.html")
+    argument = parser.add_argument("--trash", default="trash.txt")
+    argument = parser.add_argument("--species", default="48887")  # noqa 841
+    args = parser.parse_args()
+    filename = args.output
+    json_string, trash = data_json(args.json)
+    write_output(filename, json_string, args.species, args.trash, trash)
+
+
+if __name__ == "__main__":
+    options()
diff --git a/tools/proteore_reactome/reactome_analysis.xml b/tools/proteore_reactome/reactome_analysis.xml
@@ -0,0 +1,138 @@
+<tool id="reactome_analysis" name="pathway enrichment analysis" version="2021.06.03" profile="20.01">
+    <description>with Reactome</description>
+    <requirements>
+        <requirement type="package" version="3.9">python</requirement>
+    </requirements>
+    <command detect_errors="aggressive"><![CDATA[
+
+        python '$__tool_directory__/reactome_analysis.py'
+        #if $opt.input == 'text'
+            --json '$opt.list'
+            'list' 
+        #else if $opt.input == 'file'
+            --json '$opt.file'
+            'file'
+            '$opt.header'
+            '$opt.ncol'
+        #end if
+        --output '$output'
+        --trash '$trash'
+        --species '$species'
+
+    ]]></command>
+    <inputs>
+        <conditional name="opt">
+            <param name="input" type="select" label="Input IDs (UniProt Accession number, Entrez Gene ID, Gene Name)" multiple="False" >
+                <option value="text">Copy/paste your list of IDs</option>
+                <option value="file" selected="true">Input file containing your IDs</option>
+            </param>
+            <when value="text" >
+                <param name="list" type="text" label="Enter list of IDs" help="IDs must be separated by a space into the form field, for example: P31946 P62258">
+                    <sanitizer>
+                        <valid initial="string.printable">
+                            <remove value="&apos;"/>
+                        </valid>
+                        <mapping initial="none">
+                            <add source="&apos;" target="__sq__"/>
+                        </mapping>
+                    </sanitizer>
+                </param>
+            </when>
+            <when value="file" >
+                <param name="file" type="data" format="txt, tabular" label="Input file containing your IDs" help="Input file is a tab-delimited file with at least one contain containing identifier" />
+                <param name="header" type="boolean" checked="true" truevalue="true" falsevalue="false" label="Does file contain header?" />
+		        <param type="text" name="ncol" value="c1" label="Column number of IDs (e.g : Enter c1 for column n°1)">
+                    <validator type="regex" message="Please enter a column number, for example: 'c1' for the first column">[c]{0,1}[0-9]+</validator>
+                </param>
+            </when>
+        </conditional>
+        <param name="species" type="select" label="Species (human, mouse, rat)">
+            <option value="48887">Human (Homo sapiens)</option>
+            <option value="48892">Mouse (Mouse musculus)</option>
+            <option value="48895">Rat (Rattus norvegicus)</option>
+        </param>
+    </inputs>
+    <outputs>
+        <data name="output" format="html" label="" />
+        <data name="trash" format="tabular" hidden="true" label="Invalid id not send to reactome">
+            <discover_datasets pattern="(Reactome_Analysis_Tools_invalid_ID).txt" ext="tabular" visible="false" assign_primary_output="false"/>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="opt" >
+                <param name="input" value="file" />
+                <param name="file" value="ID_Converter_FKW_Lacombe_et_al_2017_OK.txt" />
+                <param name="header" value="true" />
+                <param name="ncol" value="c1" />
+            </conditional>
+            <output name="output" file="Reactome_Analysis_Tools.html" ftype="html" />
+            <output name="trash" file="Reactome_Analysis_Tools_invalid_ID.txt" ftype="tabular"  />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+**Description**
+
+This tool allows to map your gene/protein list to the Reactome pathway browser (visualize and interact with biological pathways)
+via a web service with pre-loaded data from your list of IDs.
+
+-----
+
+**Input** 
+
+A list of gene/protein IDs. List of IDs can be entered either in a copy/paste manner or by selecting a file with your IDs. 
+
+ID types allowed are: UniProt accNumber (e.g. O75391), Entrez gene ID (e.g. 8086) and gene name (e.g. TP53, NUP58...). If you have other type of ID, please use the ID_Converter tool of ProteoRE.
+
+.. class:: warningmark
+
+In copy/paste mode, the number of IDs considered in input is limited to 5000.
+
+-----
+
+**Parameters**
+
+"Select species": can be Human (Homo sapiens), Mouse (Mouse musculus) or Rat (Rattus norvegicus)
+
+-----
+
+**Output**
+
+To access results, click on the "View data" icon (from the item created in your history panel), then click on the "Analyze" button in the central panel will directly display the results in the Reactome pathway browser.  
+
+-----
+
+.. class:: infomark
+
+**Authors** 
+
+Fabregat et al., The Reactome Pathway Knowledgebase. Nucleic Acids Res. 2018. 46(D1):D649-D655. doi: 10.1093/nar/gkx1132. PubMed PMID: 29145629
+
+-----
+
+.. class:: infomark
+
+**Galaxy integration**
+
+Lien Nguyen, Florence Combes, Yves Vandenbrouck - CEA, INSERM, CNRS, Grenoble-Alpes University, BIG Institute, FR
+
+Sandra Dérozier, Olivier Rué, Christophe Caron, Valentin Loux - INRA, Paris-Saclay University, MAIAGE Unit, Migale Bioinformatics platform, FR
+
+This work has been partially funded through the French National Agency for Research (ANR) IFB project.
+
+Help: [email protected] for any questions or concerns about this tool.
+    ]]></help>
+    <citations>
+         <citation type="doi">doi:10.1186/1471-2105-15-293</citation>
+         <citation type="bibtex">@book{toolsProteoRE,
+            journal = {GitHub repository},
+            publisher = {GitHub},
+            title = {ProteoRE tools},
+            url = {https://github.com/galaxyproteomics/tools-galaxyp},
+            author = {Lien Nguyen, David Chistiany, Florence Combes,Christophe Caron, Valentin Loux Yves Vandenbrouck},
+            date = {2021},
+            year = {2021},
+        }</citation>
+    </citations>
+</tool>
diff --git a/tools/proteore_reactome/releases_notes.rst b/tools/proteore_reactome/releases_notes.rst
@@ -0,0 +1,18 @@
+=======================================
+Query pathway database - Releases Notes
+=======================================
+
+==================
+2019-05-14 Release
+==================
+
+New
+---
+
+Improvements
+------------
+
+- handle multiple IDs per line in input file (";" separated)
+
+Fixes
+-----
diff --git a/tools/proteore_reactome/template.html b/tools/proteore_reactome/template.html
@@ -0,0 +1,48 @@
+<html>
+
+  <head>
+
+    <title>Connection to the Reactome Analysis Service</title>
+
+    <style>
+        body {
+            margin: 40px;
+            background-color: #ffffff;
+            text-align: center;
+            }
+        h1 {
+            color: #19334d;
+            }
+        p {
+            margin-top: 40px;
+            }
+        button {
+            background-color: #ccddff;
+            border: 2px solid #19334d;
+            border-radius: 4px;
+            color: black;
+            padding: 16px 32px;
+            margin: 4px 2px;
+            cursor: pointer;
+            }
+    </style>
+
+  </head>
+
+  <body>
+
+    <h1>Connection to the Reactome Analysis Service</h1>
+
+    <p>Please click the button to execute the analysis:</p>
+
+    <form action="http://www.reactome.org/PathwayBrowser/#/SPECIES={species}&DTAB=AN&ANALYSIS={token}" target="_blank">
+
+    <button>Analyse</button>
+
+    </form>
+
+    <br> <br>
+
+  </body>
+
+</html>