From 991881b5df5f5228ecf4445ee2cc1431b9602ea8 Mon Sep 17 00:00:00 2001
From: AndreaFurlani <93392092+AndreaFurlani@users.noreply.github.com>
Date: Wed, 11 Oct 2023 16:29:58 +0200
Subject: [PATCH] Added the new tool query_impc (#5503)

* Fixed issues with the first commit

* Fixed issues with the first commit

* Fixed errors and issues after commit

* Fixed error E131 in .py file

* Added macros to xml file

* Fix Planemo test issues

* Fix Planemo test issues

* Fix Planemo test issues

* Fix Planemo test issues

* Fix Planemo test issues

* Fix Planemo test issues

* Fix output test issues

* Fix inputs test issues

* Performed small changes to improve code readability

* Added conditionals to tests

* Added conditionals to tests

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Fixing merging issues

* Remove conditional from tests 3/4/5

* Fixed conditionals for all tests

* Removed unused files and adjusted the text for input selection
---
 tools/query_impc/.shed.yml                    |  16 +
 tools/query_impc/impc_tool.py                 | 759 ++++++++++++++++++
 tools/query_impc/impc_tool.xml                | 351 ++++++++
 .../test-data/test_output_1_1.tabular         |  10 +
 .../test-data/test_output_1_2.tabular         |   5 +
 .../test-data/test_output_2.tabular           |  21 +
 .../test-data/test_output_3.tabular           |  79 ++
 .../test-data/test_output_9.tabular           |   4 +
 tools/query_impc/test-data/test_query_1.txt   |   1 +
 tools/query_impc/test-data/test_query_2.txt   |   1 +
 tools/query_impc/test-data/test_query_3.txt   |   1 +
 11 files changed, 1248 insertions(+)
 create mode 100644 tools/query_impc/.shed.yml
 create mode 100644 tools/query_impc/impc_tool.py
 create mode 100644 tools/query_impc/impc_tool.xml
 create mode 100644 tools/query_impc/test-data/test_output_1_1.tabular
 create mode 100644 tools/query_impc/test-data/test_output_1_2.tabular
 create mode 100644 tools/query_impc/test-data/test_output_2.tabular
 create mode 100644 tools/query_impc/test-data/test_output_3.tabular
 create mode 100644 tools/query_impc/test-data/test_output_9.tabular
 create mode 100644 tools/query_impc/test-data/test_query_1.txt
 create mode 100644 tools/query_impc/test-data/test_query_2.txt
 create mode 100644 tools/query_impc/test-data/test_query_3.txt

diff --git a/tools/query_impc/.shed.yml b/tools/query_impc/.shed.yml
new file mode 100644
index 00000000000..80aa97dd800
--- /dev/null
+++ b/tools/query_impc/.shed.yml
@@ -0,0 +1,16 @@
+name: query_impc
+owner: iuc
+description: Contains a tool to query the IMPC database.
+homepage_url: https://github.com/INFRAFRONTIERDIB/tools-iuc/tree/query_impc/tools/query_impc
+remote_repository_url: https://github.com/INFRAFRONTIERDIB/tools-iuc/tree/query_impc/tools/query_impc
+long_description: |
+  With this tool, it is possible to submit various types of queries to the IMPC database. Select the desired query from the drop down menu. 
+  As input both MGI ids or gene symbols are allowed (even mixed). If you want to input more than one id, separate them with a comma without spaces (eg: MGI:104636,MGI:104637). 
+  If a mixed input is retrieved, the order after the mapping will not be maintained. Note that if the mapping between the two types of ids doesn't retrieves a result, 
+  that id will not be included in the query input, resulting in an error if all of the ids are not mapped. The output will be a table containing the data.
+  For the phenotypes, is possible to give as input both MP term ids or HP terms ids since they will be mapped to MP terms (also here the order of the input will not be maintained).
+  For both genes and phenotypes mapping, check the "View details" section of the job to check if some of them were not mapped (typo errors/id not present in the database).
+type: unrestricted
+categories:
+- Convert Formats
+- Web Services
\ No newline at end of file
diff --git a/tools/query_impc/impc_tool.py b/tools/query_impc/impc_tool.py
new file mode 100644
index 00000000000..ae67ed6c419
--- /dev/null
+++ b/tools/query_impc/impc_tool.py
@@ -0,0 +1,759 @@
+import sys
+
+import mygene
+import pandas as pd
+import requests
+
+
+impc_api_url = "https://www.ebi.ac.uk/mi/impc/bulkdata-api"
+impc_api_search_url = f"{impc_api_url}/genes"
+impc_api_gene_bundle_url = f"{impc_api_url}/geneBundles"
+
+
+def stop_err(msg):
+    sys.exit(msg)
+
+
+def main():
+    inp = str(sys.argv[1])
+    query = str(sys.argv[3])
+
+    try:
+        if query == "7":
+            g_out = str(sys.argv[5])
+            full_gene_table(g_out)
+            sys.exit(0)
+
+        if str(sys.argv[5]) == "txt":
+            s = str(sys.argv[6])
+            if s == "t":
+                sep = "\t"
+            elif s == "s":
+                sep = " "
+            elif s in ",;.":
+                sep = s
+            else:
+                sys.exit("Separator not valid, please change it.")
+            inp = pd.read_csv(inp, header=None, delimiter=sep)
+            if len(inp.columns) == 1:
+                inp = inp.to_csv(header=None,
+                                 index=False).strip("\n").split("\n")
+                inp = ",".join(inp)
+            else:
+                inp = inp.to_csv(header=None,
+                                 index=False).strip(sep).split(sep)
+                inp = ",".join(inp)
+
+        if query == "8":
+            if str(sys.argv[5]) == "txt":
+                g_out = str(sys.argv[7])
+            else:
+                g_out = str(sys.argv[6])
+            genes_in_pipeline(inp, g_out)
+            sys.exit(0)
+        elif query == "9":
+            if str(sys.argv[5]) == "txt":
+                g_out = str(sys.argv[7])
+            else:
+                g_out = str(sys.argv[6])
+            sign_mp(inp, g_out)
+            sys.exit(0)
+        elif query == "10":
+            par_pip_ma(inp)
+            sys.exit(0)
+        elif query == "11":
+            par_gen(inp)
+            sys.exit(0)
+        elif query == "2" or query == "4":
+            final_list = pheno_mapping(inp)
+        else:
+            final_list = gene_mapping(inp)
+        inp = ",".join(final_list)
+
+        if query == "1":
+            get_pheno(inp)
+            sys.exit(0)
+        elif query == "2":
+            if str(sys.argv[5]) == "txt":
+                g_out = str(sys.argv[7])
+            else:
+                g_out = str(sys.argv[6])
+            get_genes(inp, g_out)
+            sys.exit(0)
+        elif query == "3":
+            gene_set(inp)
+            sys.exit(0)
+        elif query == "4":
+            extr_img(inp)
+            sys.exit(0)
+        elif query == "5":
+            parameters(inp)
+            sys.exit(0)
+        elif query == "6":
+            sign_par(inp)
+            sys.exit(0)
+        else:
+            stop_err("Error, non-implemented query selected: " + query)
+    except Exception as ex:
+        stop_err("Error running impc_tool.py:\n" + str(ex))
+
+
+# 1-Given a gene id, retrieve all the phenotypes related to it (id and name)
+def get_pheno(inp):
+    head = sys.argv[4]
+    mgi_accession_id = inp
+
+    gene_url = f"{impc_api_search_url}/{mgi_accession_id}"
+    gene_data = requests.get(gene_url).json()
+
+    p_list = []
+    id_list = []
+
+    if gene_data["significantMpTerms"] is None:
+        stop_err("No significant MP terms found for this gene")
+    else:
+        for x in gene_data["significantMpTerms"]:
+            p_list.append(x["mpTermId"])
+            id_list.append(x["mpTermName"])
+
+    df = pd.DataFrame()
+    df["MP term name"] = p_list
+    df["MP term id"] = id_list
+
+    if head == "True":
+        df.to_csv(sys.argv[2], header=True, index=False,
+                  sep="\t", index_label=False)
+    else:
+        df.to_csv(sys.argv[2], header=False, index=False,
+                  sep="\t", index_label=False)
+
+
+# 3-Extract all genes having a particular phenotype or a set of phenotypes
+# (e.g. relevant to a disease)
+def get_genes(inp, g_out):
+    head = sys.argv[4]
+    target_mp_terms = inp
+
+# All the data is paginated using the page and size parameters,
+# by default the endpoint returns the first 20 hits
+    gene_by_phenotypes_query = f"{impc_api_search_url}" \
+                               f"/search/findAllBySignificantMpTermIdsContains" \
+                               f"?mpTermIds={target_mp_terms}&page=0&size=20"
+    genes_with_clinical_chemistry_phen = \
+        requests.get(gene_by_phenotypes_query).json()
+    print(f"Genes with {target_mp_terms}: "
+          f"{genes_with_clinical_chemistry_phen['page']['totalElements']}")
+    acc = []
+    name = []
+    url = []
+
+    for gene in genes_with_clinical_chemistry_phen["_embedded"]["genes"]:
+        acc.append(gene["mgiAccessionId"])
+        name.append(gene["markerName"])
+        url.append(gene["_links"]["geneBundle"]["href"])
+
+    if g_out == "sym":
+        list_of_genes = pd.DataFrame(columns=["Gene symbol id", "Gene name",
+                                              "Gene bundle url"])
+        list_of_genes["Gene symbol id"] = mgi_sym_map(acc)
+    else:
+        list_of_genes = pd.DataFrame(columns=["Gene accession id",
+                                              "Gene name", "Gene bundle url"])
+        list_of_genes["Gene accession id"] = acc
+    list_of_genes["Gene name"] = name
+    list_of_genes["Gene bundle url"] = url
+
+    if head == "True":
+        list_of_genes.to_csv(sys.argv[2], header=True, index=False,
+                             sep="\t", index_label=False)
+    else:
+        list_of_genes.to_csv(sys.argv[2], header=False, index=False,
+                             sep="\t", index_label=False)
+
+
+# 4. Extract all phenotypes which are present in a particular gene set
+# (e.g. genes together in a pathway)
+def gene_set(inp):
+    head = sys.argv[4]
+    target_genes = inp
+
+    genes_in_gene_list_query = f"{impc_api_search_url}/search/" \
+                               f"findAllByMgiAccessionIdIn?" \
+                               f"mgiAccessionIds={target_genes}"
+
+    genes_in_gene_list = requests.get(genes_in_gene_list_query).json()
+    mp_terms_vs_gene_idx = {}
+
+    for gene in genes_in_gene_list["_embedded"]["genes"]:
+        mp_terms = gene["significantMpTerms"]
+        gene_acc_id = gene["mgiAccessionId"]
+        if mp_terms is None:
+            continue
+        for mp_term_name in mp_terms:
+            if mp_term_name["mpTermId"] not in mp_terms_vs_gene_idx:
+                mp_terms_vs_gene_idx[mp_term_name["mpTermId"]] = \
+                    {"mp_term": mp_term_name["mpTermId"],
+                     "mp_name": mp_term_name["mpTermName"], "genes": []}
+            mp_terms_vs_gene_idx[mp_term_name["mpTermId"]]["genes"].\
+                append(gene_acc_id)
+    genes_by_mp_term = list(mp_terms_vs_gene_idx.values())
+
+    df = pd.DataFrame()
+    terms = []
+    names = []
+    genes = []
+    for i in genes_by_mp_term:
+        terms.append(i["mp_term"])
+        names.append(i["mp_name"])
+        genes.append(",".join(i["genes"]))
+
+    df["mp_term"] = terms
+    df["mp_name"] = names
+    df["genes"] = genes
+
+    if head == "True":
+        df.to_csv(sys.argv[2], header=True, index=False,
+                  sep="\t", index_label=False)
+    else:
+        df.to_csv(sys.argv[2], header=False, index=False,
+                  sep="\t", index_label=False)
+
+
+# 7. Extract images with a particular phenotype or a set of phenotypes
+def extr_img(inp):
+    head = sys.argv[4]
+    target_mp_terms = inp  # ["MP:0002110", "MP:0000559"]
+
+# All the data is paginated using the page and size parameters,
+# by default the endpoint returns the first 20 hits
+    gene_by_phenotypes_query = f"{impc_api_search_url}/search/" \
+                               f"findAllBySignificantMpTermIdsContains?" \
+                               f"mpTermIds={target_mp_terms}&page=0&size=20"
+    genes_with_morph_mps = requests.get(gene_by_phenotypes_query).json()
+    list_of_gene_bundle_urls = [
+        gene["_links"]["geneBundle"]["href"] for gene in
+        genes_with_morph_mps["_embedded"]["genes"]
+    ]
+
+    gene_bundles = []
+    for gene_bundle_url in list_of_gene_bundle_urls:
+        gene_bundle = requests.get(gene_bundle_url).json()
+        gene_bundles.append(gene_bundle)
+
+    images_with_morphology_mps = []
+
+    # Doing just the first 20 and filtering out fields on the images
+    display_fields = ["geneSymbol", "parameterName", "biologicalSampleGroup",
+                      "colonyId", "zygosity", "sex", "downloadUrl",
+                      "externalSampleId", "thumbnailUrl"]
+
+    for gene_bundle in gene_bundles[:20]:
+        if len(gene_bundle) == 4:
+            continue
+        if gene_bundle["geneImages"] is not None:
+            images = gene_bundle["geneImages"]
+            for image in images:
+                display_image = {k: v for k, v in image.items()
+                                 if k in display_fields}
+                images_with_morphology_mps.append(display_image)
+
+    images_table = []
+    print(f"Images related to phenotype {target_mp_terms}: "
+          f"{len(images_with_morphology_mps)}")
+    # Displaying just the first 20 images
+    for i in images_with_morphology_mps[:20]:
+        row = [f"<img src='{i['thumbnailUrl']}' />"] + list(i.values())
+        images_table.append(row)
+
+    df = pd.DataFrame()
+    externalSampleId = []
+    geneSymbol = []
+    biologicalSampleGroup = []
+    sex = []
+    colonyId = []
+    zygosity = []
+    parameterName = []
+    downloadUrl = []
+    thumbnailUrl = []
+
+    for i in images_table:
+        externalSampleId.append(i[1])
+        geneSymbol.append(i[2])
+        biologicalSampleGroup.append(i[3])
+        sex.append(i[4])
+        colonyId.append(i[5])
+        zygosity.append(i[6])
+        parameterName.append(i[7])
+        downloadUrl.append(i[8])
+        thumbnailUrl.append(i[9])
+
+    df["externalSampleId"] = externalSampleId
+    df["geneSymbol"] = geneSymbol
+    df["biologicalSampleGroup"] = biologicalSampleGroup
+    df["sex"] = sex
+    df["colonyId"] = colonyId
+    df["zygosity"] = zygosity
+    df["parameterName"] = parameterName
+    df["downloadUrl"] = downloadUrl
+    df["thumbnailUrl"] = thumbnailUrl
+
+    if head == "True":
+        df.to_csv(sys.argv[2], header=True, index=False,
+                  sep="\t", index_label=False)
+    else:
+        df.to_csv(sys.argv[2], header=False, index=False,
+                  sep="\t", index_label=False)
+
+
+# 11- Which parameters have been measured for a particular knockout
+def parameters(inp):
+    head = sys.argv[4]
+    knockout = inp  # "MGI:104636"
+    gene_info = requests.get(impc_api_search_url + "/" + knockout).json()
+
+    if gene_info["phenotypingDataAvailable"]:
+        geneBundle = requests.get(gene_info["_links"]["geneBundle"]["href"])\
+            .json()
+        gen_imgs = geneBundle["geneImages"]
+        par_list = []
+        lis = {}
+        for i in gen_imgs:
+            lis = {"Parameter Name": i["parameterName"]}
+            if lis not in par_list:
+                par_list.append(lis)
+        df = pd.DataFrame()
+        li = []
+
+        for i in par_list:
+            li.append(i["Parameter Name"])
+
+        df["Parameter"] = li
+        if head == "True":
+            df.to_csv(sys.argv[2], header=True, index=False,
+                      sep="\t", index_label=False)
+        else:
+            df.to_csv(sys.argv[2], header=False, index=False,
+                      sep="\t", index_label=False)
+
+    else:
+        stop_err("No parameters available for this knockout gene")
+
+
+# 12- Which parameters identified a significant finding for a particular
+# knockout line (colony)
+def sign_par(inp):
+    head = sys.argv[4]
+    knockout = inp  # "MGI:104636"
+
+    gene_info = requests.get(f"{impc_api_url}statisticalResults/search/"
+                             f"findAllByMarkerAccessionIdIsAndSignificantTrue?"
+                             f"mgiAccessionId=" + knockout).json()
+    gene_stats = gene_info["_embedded"]["statisticalResults"]
+
+    if len(gene_stats) == 0:
+        stop_err("No statistically relevant parameters found "
+                 "for this knockout gene")
+    else:
+        df = pd.DataFrame()
+        n = []
+        p = []
+        for g in gene_stats:
+            n.append(g["parameterName"])
+            p.append(g["pvalue"])
+
+        df["Parameter name"] = n
+        df["p-value"] = p
+        if head == "True":
+            df.to_csv(sys.argv[2], header=True, index=False,
+                      sep="\t", index_label=False)
+        else:
+            df.to_csv(sys.argv[2], header=False, index=False,
+                      sep="\t", index_label=False)
+
+
+# 13- List of genes names and ID measured in a pipeline
+def genes_in_pipeline(inp, g_out):
+    head = sys.argv[4]
+    pip = inp
+
+    g_in_p_query = f"{impc_api_search_url}/search/" \
+                   f"findAllByTestedPipelineId?pipelineId={pip}&" \
+                   f"page=0&size=1000"
+    genes_in_pip = requests.get(g_in_p_query).json()
+    pages = genes_in_pip["page"]["totalPages"]
+    max_elem = genes_in_pip["page"]["totalElements"]
+
+    print(f"Genes with {pip}: {genes_in_pip['page']['totalElements']}")
+    list_d = []
+    acc = []
+    name = []
+
+    if max_elem > 1000:
+        g_in_p_query = genes_in_pip["_embedded"]["genes"]
+        for i in range(1, pages):
+            gl = requests.get(f"{impc_api_search_url}/search/"
+                              f"findAllByTestedPipelineId?pipelineId={pip}&"
+                              f"page={i}&"
+                              f"size=1000").json()["_embedded"]["genes"]
+            g_in_p_query += gl
+    else:
+        g_in_p_query = genes_in_pip["_embedded"]["genes"]
+
+    for g in g_in_p_query:
+        d = {"Gene Accession ID": g["mgiAccessionId"],
+             "Gene Name": g["markerName"]}
+        list_d.append(d)
+
+    for i in list_d:
+        acc.append(i["Gene Accession ID"])
+        name.append(i["Gene Name"])
+    if g_out == "sym":
+        list_of_genes = pd.DataFrame(columns=["Gene symbol", "Gene name"])
+        list_of_genes["Gene symbol"] = mgi_sym_map(acc)
+    else:
+        list_of_genes = pd.DataFrame(columns=["Gene accession id",
+                                              "Gene name"])
+        list_of_genes["Gene accession id"] = acc
+    list_of_genes["Gene name"] = name
+
+    if head == "True":
+        list_of_genes.to_csv(sys.argv[2], header=True, index=False,
+                             sep="\t", index_label=False)
+    else:
+        list_of_genes.to_csv(sys.argv[2], header=False, index=False,
+                             sep="\t", index_label=False)
+
+
+# 14- Extract all genes and corresponding phenotypes related to a
+# particular organ system (eg: significatMPTerm)
+def sign_mp(inp, g_out):
+    head = sys.argv[4]
+    mp_term = inp  # ["MP:0005391"]
+
+    gene_by_mpterm_query = f"{impc_api_search_url}/search/" \
+                           f"findAllBySignificantMpTermIdsContains?" \
+                           f"mpTermIds={mp_term}&size=1000"
+    genes_with_mpterm = requests.get(gene_by_mpterm_query).json()
+
+    pages = genes_with_mpterm["page"]["totalPages"]
+    genes_info = genes_with_mpterm["_embedded"]["genes"]
+
+    for pn in range(1, pages):
+        pq = f"{impc_api_search_url}/search/" \
+             f"findAllBySignificantMpTermIdsContains?" \
+             f"mpTermIds={mp_term}&page={pn}&size=1000"
+        g = requests.get(pq).json()["_embedded"]["genes"]
+        genes_info += g
+
+    list_d = []
+    d = {}
+    for g in genes_info:
+        names = []
+        ids = []
+        for s in g["significantMpTerms"]:
+            names.append(s["mpTermName"])
+            ids.append(s["mpTermId"])
+        d = {"Gene": g["mgiAccessionId"], "mpTermId": ids, "mpTermName": names}
+        list_d.append(d)
+
+    g = []
+    ids = []
+    names = []
+    for i in list_d:
+        g.append(i["Gene"])
+        ids.append(i["mpTermId"])
+        names.append(i["mpTermName"])
+
+    df = pd.DataFrame()
+    if g_out == "sym":
+        df["Gene symbol"] = mgi_sym_map(g)
+    else:
+        df["Gene Id"] = g
+    df["Significant MP terms Ids"] = ids
+    df["Significant MP terms Names"] = names
+
+    if head == "True":
+        df.to_csv(sys.argv[2], header=True, index=False,
+                  sep="\t", index_label=False)
+    else:
+        df.to_csv(sys.argv[2], header=False, index=False,
+                  sep="\t", index_label=False)
+
+
+# 16- Full table of genes and all identified phenotypes
+def full_gene_table(g_out):
+    head = sys.argv[4]
+    gene_list = requests.get(impc_api_search_url + "?page=0&size=1000").json()
+    pages = gene_list["page"]["totalPages"]
+    genes_info = gene_list["_embedded"]["genes"]
+
+    for pn in range(1, pages):
+        gp = requests.get(impc_api_search_url
+                          + f"?page={pn}&"
+                            f"size=1000").json()["_embedded"]["genes"]
+        genes_info += gp
+
+    d = {}
+    list_d = []
+
+    for i in genes_info:
+        if i["significantMpTerms"] is None:
+            d = {"Gene": i["mgiAccessionId"], "Identified phenotypes": "None"}
+        else:
+            d = {"Gene": i["mgiAccessionId"],
+                 "Identified phenotypes": [
+                     sub["mpTermId"] for sub in i["significantMpTerms"]
+            ]}
+        list_d.append(d)
+
+    df = pd.DataFrame()
+    g = []
+    p = []
+    for i in list_d:
+        g.append(i["Gene"])
+        p.append(i["Identified phenotypes"])
+
+    if g_out == "sym":
+        df["Gene symbol"] = mgi_sym_map(g)
+    else:
+        df["MGI id"] = g
+    df["MP term list"] = p
+
+    for i in range(0, len(df)):
+        if df["MP term list"][i] != "None":
+            df["MP term list"][i] = str(
+                df["MP term list"][i]
+            )[1:-1].replace("'", "")
+
+    if str(sys.argv[1]) == "True":
+        if head == "True":
+            df.to_csv(sys.argv[2], header=True, index=False,
+                      sep="\t", index_label=False)
+        else:
+            df.to_csv(sys.argv[2], header=False, index=False,
+                      sep="\t", index_label=False)
+    else:
+        df = df[df["MP term list"] != "None"]
+        df.reset_index(drop=True, inplace=True)
+        if head == "True":
+            df.to_csv(sys.argv[2], header=True, index=False,
+                      sep="\t", index_label=False)
+        else:
+            df.to_csv(sys.argv[2], header=False, index=False,
+                      sep="\t", index_label=False)
+
+
+# 18- Extract measurements and analysis for a parameter or pipeline
+def par_pip_ma(inp):
+    head = sys.argv[4]
+    id = inp
+
+    if id[0:4] == "IMPC":
+        par = True
+        ma_query = f"{impc_api_search_url}/search/" \
+                   f"findAllByTestedParameterId?" \
+                   f"parameterId={id}&page=0&size=1000"
+    else:
+        ma_query = f"{impc_api_search_url}/search/" \
+                   f"findAllByTestedPipelineId?" \
+                   f"pipelineId={id}&page=0&size=1000"
+        par = False
+
+    ma_in_pip = requests.get(ma_query).json()
+    pages = ma_in_pip["page"]["totalPages"]
+    max_elem = ma_in_pip["page"]["totalElements"]
+
+    print(f"Genes with {id}: {ma_in_pip['page']['totalElements']}")
+    list_d = []
+    list_of_genes = pd.DataFrame(columns=["Measurements", "Analysis"])
+    mes = []
+    an = []
+
+    if max_elem > 1000:
+
+        ma_in_pip = ma_in_pip["_embedded"]["genes"]
+        for pn in range(1, pages):
+            if par:
+                pip = requests.get(f"{impc_api_search_url}/search/"
+                                   f"findAllByTestedParameterId?"
+                                   f"parameterId={id}&page={pn}&"
+                                   f"size=1000").json()["_embedded"]["genes"]
+            else:
+                pip = requests.get(f"{impc_api_search_url}/search/"
+                                   f"findAllByTestedPipelineId?"
+                                   f"pipelineId={id}&page={pn}&"
+                                   f"size=1000").json()["_embedded"]["genes"]
+            ma_in_pip += pip
+
+    else:
+        ma_in_pip = ma_in_pip["_embedded"]["genes"]
+
+    for g in ma_in_pip:
+        d = {"Measurements": g[""], "Analysis": g[""]}
+        list_d.append(d)
+
+    for i in list_d:
+        mes.append(i[""])
+        an.append(i[""])
+
+    list_of_genes["Analysis"] = an
+    list_of_genes["Measurements"] = mes
+
+    if head == "True":
+        list_of_genes.to_csv(sys.argv[2], header=True, index=False,
+                             sep="\t", index_label=False)
+    else:
+        list_of_genes.to_csv(sys.argv[2], header=False, index=False,
+                             sep="\t", index_label=False)
+
+
+# 19- Get all genes and measured values for a particular parameter
+def par_gen(inp, g_out):
+    head = sys.argv[4]
+    id = inp
+
+    pa_query = f"{impc_api_search_url}/search/" \
+               f"findAllByTestedParameterId?parameterId={id}&page=0&size=1000"
+
+    gm_par = requests.get(pa_query).json()
+    pages = gm_par["page"]["totalPages"]
+    max_elem = gm_par["page"]["totalElements"]
+
+    print(f"Genes with {id}: {gm_par['page']['totalElements']}")
+    list_d = []
+    gen = []
+    mes = []
+
+    if max_elem > 1000:
+
+        gm_par = gm_par["_embedded"]["genes"]
+
+        for pn in range(1, pages):
+            pip = requests.get(f"{impc_api_search_url}/search/"
+                               f"findAllByTestedParameterId?"
+                               f"parameterId={id}&page={pn}&"
+                               f"size=1000").json()["_embedded"]["genes"]
+            gm_par += pip
+
+    else:
+        gm_par = gm_par["_embedded"]["genes"]
+
+    for g in gm_par:
+        d = {"Genes": g["mgiAccessionId"], "Measured Values": g[""]}
+        list_d.append(d)
+
+    for i in list_d:
+        gen.append(i["Genes"])
+        mes.append(i["Measured Values"])
+
+    if g_out == "sym":
+        list_of_genes = pd.DataFrame(columns=["Gene symbol",
+                                              "Measured Values"])
+        list_of_genes["Gene symbol"] = mgi_sym_map(gen)
+    else:
+        list_of_genes = pd.DataFrame(columns=["Gene accession id",
+                                              "Measured Values"])
+        list_of_genes["Gene accession id"] = gen
+    list_of_genes["Measured Values"] = mes
+
+    if head == "True":
+        list_of_genes.to_csv(sys.argv[2], header=True, index=False,
+                             sep="\t", index_label=False)
+    else:
+        list_of_genes.to_csv(sys.argv[2], header=False, index=False,
+                             sep="\t", index_label=False)
+
+
+# Function to map gene symbol to MGI ids
+def gene_mapping(inp):
+    tmp = inp.split(",")
+    final_list = []
+    sym_list = []
+    for i in tmp:
+        if "MGI:" in i:
+            final_list.append(i)
+        else:
+            sym_list.append(i)
+    del i
+
+    # symbol for symbols, mgi for MGI :
+    # https://docs.mygene.info/en/latest/doc/query_service.html#available-fields
+    if len(sym_list) != 0:
+        mg = mygene.MyGeneInfo()
+        ginfo = mg.querymany(sym_list, scopes="symbol", fields="symbol,MGI",
+                             species="mouse")
+        empty = True
+        discarded = []
+        for i in ginfo:
+            try:
+                final_list.append(i["MGI"])
+                empty = False
+            except KeyError:
+                discarded.append(i["query"])
+        if empty and len(final_list) == 0:
+            stop_err("Error: it was not possible to map the input.")
+        elif empty:
+            print("Warning: it was not possible to map any of the symbol ids. "
+                  "Only MGI ids will be used.")
+        elif len(discarded) != 0:
+            print("Warning: it was not possible to map these elements: "
+                  "" + ",".join(discarded) + "\n")
+
+    return final_list
+
+
+# Function to map phenotypes ids to names
+def pheno_mapping(inp):
+    tmp = inp.split(",")
+    final_list = []
+    sym_list = []
+    for i in tmp:
+        if "MP:" in i:
+            final_list.append(i)
+        else:
+            sym_list.append(i)
+    del i
+    if len(sym_list) != 0:
+        url = "https://raw.githubusercontent.com/AndreaFurlani/" \
+              "hp_mp_mapping_test/main/hp_mp_mapping.csv"
+        mapper = pd.read_csv(url, header=0, index_col=2)
+        empty = True
+        discarded = []
+        for i in sym_list:
+            try:
+                final_list.append(mapper.loc[i]["mpId"])
+                empty = False
+            except KeyError:
+                discarded.append(i)
+                continue
+        if empty and len(final_list) == 0:
+            stop_err("Error: it was not possible to map the input.")
+        elif empty:
+            print("Warning: it was not possible to map any of the "
+                  "HP term entries. Only MP entries will be used.")
+        elif len(discarded) != 0:
+            print("Warning: it was not possible to "
+                  "map these elements: " + ",".join(discarded) + "\n")
+    return final_list
+
+
+# Function to map MGI ids to Gene Symbols
+def mgi_sym_map(mgi_list):
+    sym_list = []
+    mg = mygene.MyGeneInfo()
+    ginfo = mg.querymany(mgi_list, scopes="MGI", fields="symbol,MGI",
+                         species="mouse")
+    discarded = []
+    for i in ginfo:
+        try:
+            sym_list.append(i["symbol"])
+        except KeyError:
+            sym_list.append(i["query"])
+            discarded.append(i["query"])
+    if len(discarded) != 0:
+        print("It was not possible to map these genes: " + ",".join(discarded))
+    return sym_list
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tools/query_impc/impc_tool.xml b/tools/query_impc/impc_tool.xml
new file mode 100644
index 00000000000..9d825334e0a
--- /dev/null
+++ b/tools/query_impc/impc_tool.xml
@@ -0,0 +1,351 @@
+<tool id="query_impc" name="IMPC" version="0.9.0" profile="22.05">
+  <description>query tool</description>
+  <macros>
+   <xml name="selectSeparator">
+    <param name="sep" type="select" label="Select the separator used in the file">
+     <option value="t">tab</option>
+     <option value="s">single space</option>
+     <option value=",">Comma</option>
+     <option value=";">Semicolumn</option>
+    </param>
+   </xml>
+   <xml name="inputType">
+    <param name="inp_sel" type="select" label="Select the type of input">
+     <option value="str">Direct input</option>
+     <option value="txt">Txt file</option>
+    </param>
+   </xml>
+   <xml name="outputType">
+    <param name="g_out" type="select" label="Select the type of gene ID in the output" help="Select if the genes in the output will use MGI IDs (default option) or Symbol IDs">
+     <option value="mgi">MGI IDs</option>
+     <option value="sym">Symbol IDs</option>
+    </param>
+   </xml>
+   <xml name="header">
+    <param name="head" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Choose if include the header in the output" help="The default value is True"/>
+   </xml>
+  </macros>
+  <creator>
+   <organization name="INFRAFRONTIER GmbH" url="https://www.infrafrontier.eu/" email="info@infrafrontier.eu" />
+   <person name="Andrea Furlani" email="andrea.furlani@infrafrontier.eu" />
+   <person name="Philipp Gormanns" email="philipp.gormanns@infrafrontier.eu" />
+  </creator>
+  <requirements>
+   <requirement type="package" version="2.25.1">requests</requirement>
+   <requirement type="package" version="1.3.5">pandas</requirement>
+   <requirement type="package" version="4.9.2">lxml</requirement>
+   <requirement type="package" version="3.2.2">mygene</requirement>
+  </requirements>
+  <command detect_errors="exit_code">
+   <![CDATA[
+   python3 '$__tool_directory__/impc_tool.py' 
+   #if $query_type.selector == "7"
+    '$query_type.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.g_out'
+   #else
+    #if $query_type.inp_q.inp_sel == "str"
+     '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel'
+    #else
+     '$query_type.inp_q.input' '$output' '$query_type.selector' '$query_type.head' '$query_type.inp_q.inp_sel' '$query_type.inp_q.sep'
+    #end if
+   #end if
+   #if $query_type.selector in ["2", "8", "9"]
+    '$query_type.g_out'
+   #end if]]>
+  </command>
+  <inputs>
+   <conditional name="query_type">
+    <param name="selector" type="select" label="Select a query">
+     <option value="1">1 - Extract all measured phenotypes related to a gene</option>
+     <option value="2">2 - Extract all genes having a particular phenotype or a set of phenotypes (e.g. relevant to a disease)</option>
+     <option value="3">3 - Extract all phenotypes which are present in a particular gene set (e.g. genes together in a pathway)</option>
+     <option value="4">4 - Extract images with a particular phenotype or a set of phenotypes</option>
+     <option value="5">5 - Which IMPReSS parameters have been measured for a particular knockout</option>
+     <option value="6">6 - Which IMPRess parameters Identified a significant finding for a particular knockout</option>
+     <option value="7">7 - Full table of genes and all Identified phenotypes, no input needed</option>
+     <option value="8">8 - Extract all genes names and ID measured in a specific IMPReSS pipeline</option>
+     <option value="9">9 - Extract all genes and corresponding phenotypes related to a particular top level phenotype category</option>
+    </param>
+    <when value="1">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter a single MGI gene ID or gene symbol"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="tabular,txt" label="Input file" help="Enter a txt file with the Gene MGI ID or gene symbol"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="2">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="3">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene or set of genes" help="Enter a single MGI gene ID (or gene symbol) or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the genes MGI IDs or symbols"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="4">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input phenotype or set of phenotypes" help="Enter a single MP/HP term ID or a list dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with the MP/HP terms"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="5">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="6">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input gene" help="Enter an IMPReSS parameter ID or a list of IDs dividing each ID with a comma (without spaces)"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS parameter ID or a list of IDs"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+    </when>
+    <when value="7">
+     <param name="input" type="boolean" checked="true" truevalue="True" falsevalue="False" label="Include genes without identified phenotypes?" help="Choose if include in the output table also those genes that have no registred phenotypes. By default they are excluded."/>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="8">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input pipeline" help="Enter a IMPReSS pipeline ID"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with an IMPReSS pipeline ID"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+    <when value="9">
+     <conditional name="inp_q">
+      <expand macro="inputType" />
+      <when value="str">
+       <param name="input" type="text" label="Input ID" help="Enter a top level phenotype category ID"/>
+      </when>
+      <when value="txt">
+       <param name="input" type="data" format="data,tabular,txt" label="Input file" help="Enter a txt file with a top level phenotype category ID"/>
+       <expand macro="selectSeparator" />
+      </when>
+     </conditional>
+     <expand macro="header" />
+     <expand macro="outputType" />
+    </when>
+   </conditional>
+  </inputs>
+  <outputs>
+   <data format="tabular" name="output" label="${tool.name} query n° $query_type.selector"/>  
+  </outputs>
+  <tests>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="1"/>
+     <conditional name="inp_q">
+      <param name="inp_sel" value="txt"/>
+      <param name="input" value="test_query_1.txt"/>
+      <param name="sep" value="t"/>
+     </conditional>
+     <param name="head" value="True"/>
+    </conditional>
+    <output name="output" file="test_output_1_1.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="1"/>
+     <conditional name="inp_q">
+      <param name="input" value="Car4"/>
+      <param name="inp_sel" value="str"/>
+     </conditional>
+     <param name="head" value="True"/>
+   </conditional>
+   <output name="output" file="test_output_1_2.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="2"/>
+     <conditional name="inp_q">
+      <param name="input" value="test_query_2.txt"/>
+      <param name="inp_sel" value="txt"/>
+      <param name="sep" value="t"/>
+      <param name="g_out" value="mgi"/>
+     </conditional>
+     <param name="head" value="True"/>
+    </conditional>
+    <output name="output" file="test_output_2.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="3"/>
+     <conditional name="inp_q">
+      <param name="input" value="test_query_3.txt"/>
+      <param name="inp_sel" value="txt"/>
+      <param name="sep" value="t"/>
+     </conditional>
+     <param name="head" value="False"/>
+    </conditional>
+    <output name="output" value="test_output_3.tabular"/>
+   </test>
+   <test expect_num_outputs="1">
+    <conditional name="query_type">
+     <param name="selector" value="9"/>
+     <conditional name="inp_q">
+      <param name="input" value="MP:0005388"/>
+      <param name="inp_sel" value="str"/>
+     </conditional>
+     <param name="head" value="True"/>
+     <param name="g_out" value="sym"/>
+    </conditional>
+    <output name="output" file="test_output_9.tabular"/>
+   </test>
+  </tests>
+  <help><![CDATA[
+   **What it does**
+ 
+   With this tool, it is possible to submit various types of queries to the IMPC database.
+   Select the desired query from the drop down menu. As input both MGI IDs or gene symbols are allowed (even mixed). If you want to input more than one ID, separate them with a comma without spaces (eg: MGI:104636,MGI:104637). If a mixed input is retrieved, the order after the mapping will not be maintained.
+   Note that if the mapping between the two types of IDs doesn't retrieves a result, that ID will not be included in the query input, resulting in an error if all of the IDs are not mapped. The output will be a table containing the data.
+   For the phenotypes, is possible to give as input both MP term IDs or HP terms IDs since they will be mapped to MP terms (also here the order of the input will not be maintained).
+   For both genes and phenotypes mapping, check the "View details" section of the job to check if some of them were not mapped (typo errors/ID not present in the database).
+   For queries requiring an IMPReSS pipeline ID, here_ is possible to find a complete list with details about each pipeline.
+   For query 7 no inputs are required and you can choose if including genes without identified phenotypes or not.
+   In query number 9, a top level phenotype category is required as input. On IMPC, phenotypes are divided into 20 categories to summarize wich systems are mainly influenced by the phenotype. In the database they are 24, since some of them are splitted into different groups:
+ 
+ 
+   +-----------------------------------------+---------------------------------------+
+   |    Top level phenotype category name    |    top level phenotype category ID    |
+   +=========================================+=======================================+
+   |    Immune system phenotype              |    MP:0005387                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Integument phenotype                 |    MP:0010771                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Adipose tissue phenotype             |    MP:0005375                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Hearing/vestibular/ear phenotype     |    MP:0005377                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Hematopoietic system phenotype       |    MP:0005397                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Craniofacial phenotype               |    MP:0005382                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Cardiovascular system phenotype      |    MP:0005385                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Renal/urinary system phenotype       |    MP:0005367                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Homeostasis/metabolism phenotype     |    MP:0005376                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Pigmentation phenotype               |    MP:0001186                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Limbs/digits/tail phenotype          |    MP:0005371                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Nervous system phenotype             |    MP:0003631                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Vision/eye phenotype                 |    MP:0005391                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Liver/biliary system phenotype       |    MP:0005370                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Respiratory system phenotype         |    MP:0005388                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Behavior/neurological phenotype      |    MP:0005386                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Skeleton phenotype                   |    MP:0005390                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Mortality/aging                      |    MP:0010768                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Reproductive system phenotype        |    MP:0005389                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Endocrine/exocrine gland phenotype   |    MP:0005379                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Growth/size/body region phenotype    |    MP:0005378                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Embryo phenotype                     |    MP:0005380                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Muscle phenotype                     |    MP:0005369                         |
+   +-----------------------------------------+---------------------------------------+
+   |    Digestive/alimentary phenotype       |    MP:0005381                         |
+   +-----------------------------------------+---------------------------------------+
+ 
+   |
+   |
+ 
+   Moreover, the when the output of a query is a list of genes, the user can choose if the output will be MGI IDs or gene symbols. Please note that it is not possible to map a gene, it will had the same ID as the beggining.
+   For each query is possible to choose if include or not an header row. Note that not all tools have an option to remove it automatically. In this case the user will have to remove it using the tool "Remove beginning of a file".
+ 
+ 
+   The headers for each query are the following:
+ 
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |         Query                                                                                     |    Output header columns                                                       |
+   +===================================================================================================+================================================================================+
+   |Extract all measured phenotypes related to a gene                                                  |MP term name, MP term ID                                                        |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes having a particular phenotype or a set of phenotypes                             |Gene accession ID/Gene symbol, Gene name, Gene bundle url                       |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all phenotypes which are present in a particular gene set                                  |MP term ID, MP term name, genes                                                 |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract images with a particular phenotype or a set of phenotypes                                  |External sample ID, Gene symbol, Biological sample group, Sex, Colony ID,       |
+   |                                                                                                   |Zygosity, Parameter name, Download url, Thumbnail url                           |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Which IMPReSS parameters have been measured for a particular knockout                              |IMPReSS Parameter name                                                          |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Which IMPRess parameters identified a significant finding for a particular knockout                |IMPReSS Parameter name, p-value                                                 |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Full table of genes and all identified phenotypes                                                  |Gene accession ID/Gene symbol, Identified phenotypes                            |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes names and ID measured in a specific IMPReSS pipeline                             |Gene accession ID/Gene symbol, Gene name                                        |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+   |Extract all genes and corresponding phenotypes related to a particular top level phenotype category|Gene accession ID/Gene symbol, Significant mp term ID, Significant mp term name |
+   +---------------------------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
+ 
+   .. _here: https://www.mousephenotype.org/impress/pipelines
+  ]]></help>
+  <citations>
+   <citation type="doi">https://doi.org/10.1093/nar/gku1193</citation>
+   <citation type="doi">https://doi.org/10.12688/f1000research.25369.1</citation>
+   <citation type="doi">https://doi.org/10.1038/nature19356</citation>
+  </citations>
+ </tool>
\ No newline at end of file
diff --git a/tools/query_impc/test-data/test_output_1_1.tabular b/tools/query_impc/test-data/test_output_1_1.tabular
new file mode 100644
index 00000000000..b82250a17e8
--- /dev/null
+++ b/tools/query_impc/test-data/test_output_1_1.tabular
@@ -0,0 +1,10 @@
+MP term name	MP term id
+MP:0002135	abnormal kidney morphology
+MP:0000194	increased circulating calcium level
+MP:0002574	increased vertical activity
+MP:0005633	increased circulating sodium level
+MP:0001303	abnormal lens morphology
+MP:0002965	increased circulating serum albumin level
+MP:0001304	cataract
+MP:0010052	increased grip strength
+MP:0001402	decreased locomotor activity
diff --git a/tools/query_impc/test-data/test_output_1_2.tabular b/tools/query_impc/test-data/test_output_1_2.tabular
new file mode 100644
index 00000000000..4a3aadca5b4
--- /dev/null
+++ b/tools/query_impc/test-data/test_output_1_2.tabular
@@ -0,0 +1,5 @@
+MP term name	MP term id
+MP:0000194	increased circulating calcium level
+MP:0011110	preweaning lethality, incomplete penetrance
+MP:0001303	abnormal lens morphology
+MP:0010053	decreased grip strength
diff --git a/tools/query_impc/test-data/test_output_2.tabular b/tools/query_impc/test-data/test_output_2.tabular
new file mode 100644
index 00000000000..861646a48bf
--- /dev/null
+++ b/tools/query_impc/test-data/test_output_2.tabular
@@ -0,0 +1,21 @@
+Gene accession id	Gene name	Gene bundle url
+MGI:1345144	sprouty RTK signaling antagonist 4	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1345144
+MGI:2670964	terminal nucleotidyltransferase 5A	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:2670964
+MGI:95490	fibrillin 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95490
+MGI:95689	growth differentiation factor 6	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95689
+MGI:1341886	ajuba LIM protein	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1341886
+MGI:1347352	hormonally upregulated Neu-associated kinase	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1347352
+MGI:109331	nucleoredoxin	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109331
+MGI:1914061	dual oxidase maturation factor 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1914061
+MGI:1915958	RAB, member RAS oncogene family-like 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1915958
+MGI:1917363	ciliary microtubule associated protein 1B	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1917363
+MGI:1920858	MARVEL (membrane-associating) domain containing 3	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1920858
+MGI:106576	chondroitin polymerizing factor	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:106576
+MGI:107185	chaperonin containing Tcp1, subunit 5 (epsilon)	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107185
+MGI:1931881	DnaJ heat shock protein family (Hsp40) member B12	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1931881
+MGI:109327	BCL2/adenovirus E1B interacting protein 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:109327
+MGI:1913955	deoxyribonuclease 1-like 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1913955
+MGI:107374	paired-like homeodomain transcription factor 1	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107374
+MGI:1335088	proline-serine-threonine phosphatase-interacting protein 2	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:1335088
+MGI:95688	growth differentiation factor 5	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:95688
+MGI:107474	CD38 antigen	https://www.ebi.ac.uk/mi/impc/bulkdata-api/geneBundles/MGI:107474
diff --git a/tools/query_impc/test-data/test_output_3.tabular b/tools/query_impc/test-data/test_output_3.tabular
new file mode 100644
index 00000000000..0db14cd3fdc
--- /dev/null
+++ b/tools/query_impc/test-data/test_output_3.tabular
@@ -0,0 +1,79 @@
+MP:0002764	short tibia	MGI:99960,MGI:108071
+MP:0001785	edema	MGI:99960
+MP:0002968	increased circulating alkaline phosphatase level	MGI:99960
+MPATH:590	fibro-osseous lesion	MGI:99960
+MP:0001399	hyperactivity	MGI:99960,MGI:1354170
+MP:0011100	preweaning lethality, complete penetrance	MGI:99960,MGI:1344380,MGI:1917473
+MP:0010052	increased grip strength	MGI:99960,MGI:96709
+MPATH:134	hyperplasia	MGI:99960
+MP:0000218	increased leukocyte cell number	MGI:99960,MGI:96709
+MP:0005013	increased lymphocyte cell number	MGI:99960
+MP:0001363	increased anxiety-related response	MGI:1354170
+MP:0001258	decreased body length	MGI:1354170,MGI:108071,MGI:1915775,MGI:2443026
+MP:0003795	abnormal bone structure	MGI:1354170
+MP:0001417	decreased exploration in new environment	MGI:1354170,MGI:96709
+MP:0002797	increased thigmotaxis	MGI:1354170
+MP:0002757	decreased vertical activity	MGI:1354170
+MP:0011960	abnormal eye anterior chamber depth	MGI:1354170
+MP:0010124	decreased bone mineral content	MGI:1354170
+MP:0001402	decreased locomotor activity	MGI:1354170
+MP:0004924	abnormal behavior	MGI:1354170,MGI:96709
+MP:0013279	increased fasting circulating glucose level	MGI:99502,MGI:1860418,MGI:103225
+MP:0005333	decreased heart rate	MGI:3616082
+MP:0001406	abnormal gait	MGI:96709
+MP:0010053	decreased grip strength	MGI:96709,MGI:1924093,MGI:1915775
+MP:0001523	impaired righting response	MGI:96709
+MP:0005559	increased circulating glucose level	MGI:96709
+MP:0000745	tremors	MGI:96709
+MPATH:52	lipid depletion	MGI:1913564
+MPATH:42	lipid deposition	MGI:1913564
+MP:0005419	decreased circulating serum albumin level	MGI:1860418
+MP:0000219	increased neutrophil cell number	MGI:1860418
+MP:0005567	decreased circulating total protein level	MGI:1860418,MGI:1915775
+MP:0008810	increased circulating iron level	MGI:1914361
+MP:0002875	decreased erythrocyte cell number	MGI:1914361
+MP:0000208	decreased hematocrit	MGI:1914361
+MP:0002874	decreased hemoglobin content	MGI:1914361
+MP:0005566	decreased blood urea nitrogen level	MGI:103225,MGI:1915775
+MP:0005343	increased circulating aspartate transaminase level	MGI:103225
+MP:0011954	shortened PQ interval	MGI:103225
+MP:0005344	increased circulating bilirubin level	MGI:103225,MGI:95479
+MP:0002644	decreased circulating triglyceride level	MGI:103225
+MP:0001415	increased exploration in new environment	MGI:103225
+MP:0010511	shortened PR interval	MGI:103225
+MP:0002574	increased vertical activity	MGI:1915291
+MP:0003917	increased kidney weight	MGI:1915291
+MP:0013292	embryonic lethality prior to organogenesis	MGI:1344380
+MP:0000221	decreased leukocyte cell number	MGI:95479
+MP:0005016	decreased lymphocyte cell number	MGI:95479
+MP:0012361	decreased large unstained cell number	MGI:95479
+MP:0001146	abnormal testis morphology	MGI:2443598
+MP:0002152	abnormal brain morphology	MGI:2443598
+MPATH:127	atrophy	MGI:2443598
+MPATH:639	hydrocephalus	MGI:2443598
+MP:0001925	male infertility	MGI:2443598
+MP:0002092	abnormal eye morphology	MGI:2443598
+MP:0005238	increased brain size	MGI:2443598
+MP:0001147	small testis	MGI:2443598
+MP:0000598	abnormal liver morphology	MGI:2441730
+MP:0002833	increased heart weight	MGI:2441730
+MP:0011110	preweaning lethality, incomplete penetrance	MGI:2441730,MGI:1915775,MGI:2443026
+MP:0004738	abnormal auditory brainstem response	MGI:2441730
+MP:0000599	enlarged liver	MGI:2441730
+MP:0009476	enlarged cecum	MGI:2441730
+MP:0005565	increased blood urea nitrogen level	MGI:2441730
+MP:0001284	absent vibrissae	MGI:2441730
+MP:0004832	enlarged ovary	MGI:2441730
+MP:0005084	abnormal gallbladder morphology	MGI:1915775
+MP:0000274	enlarged heart	MGI:1915775
+MP:0009142	decreased prepulse inhibition	MGI:1915775
+MP:0000692	small spleen	MGI:1915775
+MP:0030610	absent teeth	MGI:1915775
+MP:0001325	abnormal retina morphology	MGI:1915775
+MP:0000266	abnormal heart morphology	MGI:1915775
+MPATH:64	developmental dysplasia	MGI:1915775
+MP:0000494	abnormal cecum morphology	MGI:1915775
+MP:0001120	abnormal uterus morphology	MGI:1915775
+MP:0000689	abnormal spleen morphology	MGI:1915775
+MP:0009709	hydrometra	MGI:1915775
+MP:0002060	abnormal skin morphology	MGI:1915775
diff --git a/tools/query_impc/test-data/test_output_9.tabular b/tools/query_impc/test-data/test_output_9.tabular
new file mode 100644
index 00000000000..a5024bb6f9b
--- /dev/null
+++ b/tools/query_impc/test-data/test_output_9.tabular
@@ -0,0 +1,4 @@
+Gene symbol	Significant MP terms Ids	Significant MP terms Names
+Cacna1s	['MP:0001697', 'MP:0001785', 'MP:0003231', 'MP:0005388', 'MP:0001491', 'MP:0001575', 'MP:0003743', 'MP:0001914', 'MP:0011100', 'MP:0005560']	['abnormal embryo size', 'edema', 'abnormal placenta vasculature', 'respiratory system phenotype', 'unresponsive to tactile stimuli', 'cyanosis', 'abnormal facial morphology', 'hemorrhage', 'preweaning lethality, complete penetrance', 'decreased circulating glucose level']
+Ndel1	['MP:0001697', 'MP:0003984', 'MP:0002111', 'MP:0005388', 'MP:0011100']	['abnormal embryo size', 'embryonic growth retardation', 'abnormal tail morphology', 'respiratory system phenotype', 'preweaning lethality, complete penetrance']
+Zfp536	['MP:0003019', 'MP:0005564', 'MP:0005388', 'MP:0001575', 'MP:0001399', 'MP:0011100', 'MP:0005641']	['increased circulating chloride level', 'increased hemoglobin content', 'respiratory system phenotype', 'cyanosis', 'hyperactivity', 'preweaning lethality, complete penetrance', 'increased mean corpuscular hemoglobin concentration']
diff --git a/tools/query_impc/test-data/test_query_1.txt b/tools/query_impc/test-data/test_query_1.txt
new file mode 100644
index 00000000000..30dbc4efa3d
--- /dev/null
+++ b/tools/query_impc/test-data/test_query_1.txt
@@ -0,0 +1 @@
+MGI:1923523
\ No newline at end of file
diff --git a/tools/query_impc/test-data/test_query_2.txt b/tools/query_impc/test-data/test_query_2.txt
new file mode 100644
index 00000000000..fd3d667ae3b
--- /dev/null
+++ b/tools/query_impc/test-data/test_query_2.txt
@@ -0,0 +1 @@
+MP:0002110	MP:0000559
\ No newline at end of file
diff --git a/tools/query_impc/test-data/test_query_3.txt b/tools/query_impc/test-data/test_query_3.txt
new file mode 100644
index 00000000000..04e762f8bb6
--- /dev/null
+++ b/tools/query_impc/test-data/test_query_3.txt
@@ -0,0 +1 @@
+MGI:1913564	MGI:1915291	MGI:1914361	MGI:1915775	MGI:1354170	MGI:103225	MGI:2441730	MGI:108071	MGI:2443598	MGI:106643	MGI:1917473	MGI:1338073	MGI:1924093	MGI:99960	MGI:99502	MGI:95479	MGI:1344380	MGI:1860418	MGI:1354721	MGI:3616082	MGI:96709	MGI:2443026
\ No newline at end of file