galaxyproject · bgruening · Sep 14, 2024 · Sep 6, 2024 · Sep 9, 2024 · Sep 9, 2024
diff --git a/tools/anndata/inspect.xml b/tools/anndata/inspect.xml
@@ -283,7 +283,7 @@ pd.DataFrame(adata.varm['PCs']).to_csv("$varm_PCs", sep="\t", index = False)
             </conditional>
             <output name="uns_neighbors_connectivities" ftype="mtx">
                 <assert_contents>
-                    <has_text_matching expression="100	100	1248" />
+                    <has_text_matching expression="100	100	2496" />
                     <has_text_matching expression="4.880" />
                 </assert_contents>
             </output>
@@ -305,14 +305,14 @@ pd.DataFrame(adata.varm['PCs']).to_csv("$varm_PCs", sep="\t", index = False)
             <output name="uns_paga_connectivities" ftype="mtx">
                 <assert_contents>
                     <has_text_matching expression="16	16	97" />
-                    <has_text_matching expression="1.000" />
+                    <has_text_matching expression="2 1 1" />
                     <has_text_matching expression="8.839" />
                 </assert_contents>
             </output>
             <output name="uns_paga_connectivities_tree" ftype="mtx">
                 <assert_contents>
                     <has_text_matching expression="16	16	15" />
-                    <has_text_matching expression="1.000" />
+                    <has_text_matching expression="1 2 1" />
                 </assert_contents>
             </output>
         </test>

diff --git a/tools/anndata/macros.xml b/tools/anndata/macros.xml
@@ -1,10 +1,11 @@
 <macros>
-    <token name="@TOOL_VERSION@">0.10.3</token>
+    <token name="@TOOL_VERSION@">0.10.9</token>
     <token name="@VERSION_SUFFIX@">0</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="@TOOL_VERSION@">anndata</requirement>
             <requirement type="package" version="3.0.6">loompy</requirement>
+            <requirement type="package" version="1.26.4">numpy</requirement>
             <yield />
         </requirements>
     </xml>
@@ -76,4 +77,12 @@ and cells, such as Name, Chromosome, Position (for genes), and Strain, Sex, Age
             </when>
         </conditional>
     </xml>
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+                <remove value="&apos;"/>
+            </valid>
+       </sanitizer>
+    </xml>
 </macros>
diff --git a/tools/anndata/manipulate.xml b/tools/anndata/manipulate.xml
@@ -50,6 +50,41 @@ adata.rename_categories(
     key='$manipulate.key',
     categories=$categories)
 
+#else if $manipulate.function == 'organize_keys'
+    #if $manipulate.obs_keys
+        #set $keys = [x.strip() for x in str($manipulate.obs_keys).split(',')]
+        adata.obs = adata.obs.drop(columns=$keys)
+    #end if
+    #if $manipulate.sort_obs
+    adata.obs = adata.obs.reindex(sorted(adata.obs.columns), axis=1)
+    #end if
+
+    #if $manipulate.var_keys
+        #set $keys = [x.strip() for x in str($manipulate.var_keys).split(',')]
+        adata.var = adata.vars.drop(columns=$keys)
+    #end if
+    #if $manipulate.sort_vars
+    adata.var = adata.var.reindex(sorted(adata.var.columns), axis=1)
+    #end if
+
+    #if $manipulate.uns_keys
+        #set $keys = [x.strip() for x in str($manipulate.uns_keys).split(',')]
+        adata.uns = adata.uns.drop(columns=$keys)
+    #end if
+    #if $manipulate.sort_uns
+    adata.uns = adata.uns.reindex(sorted(adata.uns.columns), axis=1)
+    #end if
+
+#else if $manipulate.function == 'flag_genes'
+## adapted from anndata operations
+    #for $i, $flag in enumerate($gene_flags)
+k_cat = gene_names.str.startswith('${flag.startswith}')
+if k_cat.sum() > 0:
+    adata.var['${flag.col_name}'] = k_cat
+else:
+    print(f'No genes starting with {'${flag.startswith}'} found.')
+#end for
+
 #else if $manipulate.function == 'strings_to_categoricals'
 adata.strings_to_categoricals()
 
@@ -71,6 +106,14 @@ obs.index = obs_index
 adata.obs = obs
     #end if
 
+#else if $manipulate.function == 'split_on_obs'
+import os
+res_dir = "output_split"
+os.makedirs(res_dir, exist_ok=True)
+for field_value in adata.obs["${split_on_obs.key}"].unique():
+    ad_s = adata[adata.obs.${split_on_obs.key} == field_value]
+    ad_s.write(f"{res_dir}/${split_on_obs.key}_{s}.h5", compression='gzip')
+
 #else if $manipulate.function == 'filter'
     #if $manipulate.filter.filter == 'key'
         #if $manipulate.var_obs == 'var'
@@ -126,7 +169,8 @@ adata.raw = adata
 
 #end if
 
-adata.write('anndata.h5ad')
+#if $manipulate.function != 'split_on_obs'
+adata.write('anndata.h5ad', compression='gzip')
 ]]></configfile>
     </configfiles>
     <inputs>
@@ -137,9 +181,12 @@ adata.write('anndata.h5ad')
                 <option value="obs_names_make_unique">Makes the obs index unique by appending '1', '2', etc</option>
                 <option value="var_names_make_unique">Makes the var index unique by appending '1', '2', etc</option>
                 <option value="rename_categories">Rename categories of annotation</option>
+                <option value="organize_keys">Remove/Sort keys of any annotations</option>
+                <option value="flag_genes">Flag genes start with a pattern</option><!--adapted from EBI anndata operations tool -->
                 <option value="strings_to_categoricals">Transform string annotations to categoricals</option>
                 <option value="transpose">Transpose the data matrix, leaving observations and variables interchanged</option>
                 <option value="add_annotation">Add new annotation(s) for observations or variables</option>
+                <option value="split_on_obs">Add new annotation(s) for observations or variables</option><!--adapted from EBI anndata operations tool-->
                 <option value="filter">Filter observations or variables</option>
                 <option value="save_raw">Freeze the current state into the 'raw' attribute</option>
             </param>
@@ -167,6 +214,29 @@ adata.write('anndata.h5ad')
                 <param name="key" type="text" value="" label="Key for observations or variables annotation" help="Annotation key in obs or var"/>
                 <param name="categories" type="text" value="" label="Comma-separated list of new categories" help="It should be the same number as the old categories"/>
             </when>
+            <when value="organize_keys">
+                <param name="obs_keys" type="text" value="" optional="true" label="Keys/fields to remove from observations (obs)">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param name="sort_obs" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sort keys of 'obs' annotation"/>
+                <param name="var_keys" type="text" value="" optional="true" label="Keys/fields to remove from variables (var)">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param name="sort_var" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sort keys of 'var' annotation"/>
+                <param name="uns_keys" type="text" value="" optional="true" label="Keys/fields to remove from unstructured (uns)">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param name="sort_uns" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Sort keys of 'uns' annotation"/>
+                <param name="obsm_keys" type="text" value="" optional="true" label="Keys/fields to remove from 'obsm' annotation">
+                    <expand macro="sanitize_query"/>
+                </param>
+            </when>
+            <when value="flag_genes">
+                <repeat name="gene_flags" title="Flag genes that start with these names">
+                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="for example, 'MT-' for mito genes"/>
-                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="for example, 'MT-' for mito genes"/>
+                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="For example, 'MT-' for mito genes"/>
-                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="for example, 'MT-' for mito genes"/>
+                    <param name="startswith" type="text" label="Text that you expect the genes to be flagged to start with" help="For example, 'MT-' for mito genes"/>
+                    <param name="col_name" type="text" label="Name of the column in var.names where this boolean flag is stored" help="for example, name this column as 'mito' for mitochondrial genes."/>
-                    <param name="col_name" type="text" label="Name of the column in var.names where this boolean flag is stored" help="for example, name this column as 'mito' for mitochondrial genes."/>
+                    <param name="col_name" type="text" label="Name of the column in var.names where this boolean flag is stored" help="For example, name this column as 'mito' for mitochondrial genes."/>
-                    <param name="col_name" type="text" label="Name of the column in var.names where this boolean flag is stored" help="for example, name this column as 'mito' for mitochondrial genes."/>
+                    <param name="col_name" type="text" label="Name of the column in var.names where this boolean flag is stored" help="For example, name this column as 'mito' for mitochondrial genes."/>
+                </repeat>
+            </when>
             <when value="strings_to_categoricals" ></when>
             <when value="transpose" ></when>
             <when value="add_annotation">
@@ -177,6 +247,9 @@ adata.write('anndata.h5ad')
                 <param name="new_annot" type="data" format="tabular" label="Table with new annotations"
                     help="The new table should have the same number of rows and same order than obs or var. The key names should be in the header (1st line)"/>
             </when>
+            <when value="split_on_obs">
+                <param name="key" type="text" label="The obs key to split on" help="For example, if you want to split on cluster annotation, you can use the key 'louvain'. The output will be a collection of anndata objects"/>
+            </when>
             <when value="filter">
                 <param name="var_obs" type="select" label="What to filter?">
                     <option value="var">Variables (var)</option>
@@ -237,7 +310,13 @@ adata.write('anndata.h5ad')
         </conditional>
     </inputs>
     <outputs>
-        <data name="anndata" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${manipulate.function}) on ${on_string}"/>
+        <data name="anndata" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${manipulate.function}) on ${on_string}">
+            <filter>manipulate['function'] != 'split_on_obs'</filter>
+        </data>
+        <collection name="output_h5ad_split" type="list" label="${tool.name} (${manipulate.function}) on ${on_string}">
+            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.h5" directory="output_split" format="h5ad" visible="true"/>
+            <filter>manipulate['function'] == 'split_on_obs'</filter>
+        </collection>
     </outputs>
     <tests>
         <test>
@@ -386,9 +465,9 @@ adata.write('anndata.h5ad')
     <help><![CDATA[
 **What it does**
 
-This tool takes a AnnData dataset, manipulates it and returns it. 
+This tool takes a AnnData dataset, manipulates it and returns it.
 
-The possible manipulations are: 
+The possible manipulations are:
 
 - Concatenate along the observations axis (`concatenate method <https://anndata.readthedocs.io/en/latest/generated/anndata.AnnData.concatenate.html>`__)
 
@@ -416,7 +495,7 @@ The possible manipulations are:
 
     Data matrix is transposed, observations and variables are interchanged.
 
-- Add annotation for variables or observations 
+- Add annotation for variables or observations
 
 - Filter data variables or observations, by index or key
 

diff --git a/tools/anndata/test-data/addloomout1.loom b/tools/anndata/test-data/addloomout1.loom
diff --git a/tools/anndata/test-data/addloomout3.loom b/tools/anndata/test-data/addloomout3.loom
diff --git a/tools/anndata/test-data/converted.loom.test b/tools/anndata/test-data/converted.loom.test
diff --git a/tools/anndata/test-data/export.krumsiek11.loom b/tools/anndata/test-data/export.krumsiek11.loom
diff --git a/tools/anndata/test-data/import.csv.h5ad b/tools/anndata/test-data/import.csv.h5ad
diff --git a/tools/anndata/test-data/import.loom.krumsiek11.h5ad b/tools/anndata/test-data/import.loom.krumsiek11.h5ad
diff --git a/tools/anndata/test-data/import.mtx.legacy_10x.h5ad b/tools/anndata/test-data/import.mtx.legacy_10x.h5ad
diff --git a/tools/anndata/test-data/import.mtx.no_10x.h5ad b/tools/anndata/test-data/import.mtx.no_10x.h5ad
diff --git a/tools/anndata/test-data/import.mtx.v3_10x.h5ad b/tools/anndata/test-data/import.mtx.v3_10x.h5ad
diff --git a/tools/anndata/test-data/import.tsv.h5ad b/tools/anndata/test-data/import.tsv.h5ad
diff --git a/tools/anndata/test-data/import.umi_tools.h5ad b/tools/anndata/test-data/import.umi_tools.h5ad