galaxyproject · bgruening · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024 · Mar 14, 2024
diff --git a/tools/scanpy/cluster_reduce_dimension.xml b/tools/scanpy/cluster_reduce_dimension.xml
@@ -294,7 +294,7 @@ sc.tl.embedding_density(
                 <param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations ('adata.obs').">
                     <expand macro="sanitize_query" />
                 </param>
-                <param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
+                <param argument="use_rna_velocity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
                 <param argument="model" type="select" label="PAGA connectivity model" help="">
                     <option value="v1.2">v1.2</option>
                     <option value="v1.0">v1.0</option>

diff --git a/tools/scanpy/macros.xml b/tools/scanpy/macros.xml
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.9.6</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <token name="@profile@">22.05</token>
     <xml name="requirements">
         <requirements>
@@ -11,6 +11,7 @@
             <requirement type="package" version="1.5.3">pandas</requirement>
             <requirement type="package" version="3.7">matplotlib</requirement>
             <requirement type="package" version="0.12.2">seaborn</requirement>
+            <requirement type="package" version="3.0.0">magic-impute</requirement>
             <yield />
         </requirements>
     </xml>

diff --git a/tools/scanpy/normalize.xml b/tools/scanpy/normalize.xml
@@ -1,5 +1,5 @@
 <tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
-    <description>with scanpy</description>
+    <description>and impute with scanpy</description>
     <macros>
         <import>macros.xml</import>
     </macros>
@@ -65,6 +65,29 @@ sc.pp.recipe_seurat(
     plot=False,
     copy=False)
 
+#else if $method.method == "external.pp.magic"
+sc.external.pp.magic(
+    adata=adata,
+    name_list='$method.name_list',
+    knn=$method.knn,
+    #if str($method.decay) != ''
+    decay=$method.decay,
+    #end if
+    #if str($method.knn_max) != ''
+    knn_max=$method.knn_max,
+    #end if
+    #if $method.t == -1
+    t='auto',
+    #else
+    t=$method.t,
+    #end if
+    #if str($method.n_pca) != ''
+    n_pca=$method.n_pca,
+    #end if
+    solver='$method.solver',
+    knn_dist='$method.knn_dist',
+    random_state=$method.random_state,
+    copy=False)
 #end if
 
 @CMD_anndata_write_outputs@
@@ -79,6 +102,7 @@ sc.pp.recipe_seurat(
                 <option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
                 <option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
                 <option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
+                <option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option>
             </param>
             <when value="pp.normalize_total">
                 <param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
@@ -119,6 +143,29 @@ sc.pp.recipe_seurat(
             <when value="pp.recipe_seurat">
                 <expand macro="param_log"/>
             </when>
+            <when value="external.pp.magic">
+                <param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
+                    <option value="all_genes">All genes</option>
+                    <option value="pca_only">PCA only</option>
+                </param>
+                <param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
+                <param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails" 
+                    help="If not set, alpha decaying kernel is not used" />
+                <param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
+                    help="If not set, will be set to 3 * knn" />
+                <param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
+                    help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
+                <param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
+                    help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
+                <param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
+                    <option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
+                    <option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
+                </param>
+                <param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
+                    <expand macro="distance_metric_options"/>
+                </param>
+                <expand macro="param_random_state"/>
+            </when>
         </conditional>
         <expand macro="inputs_common_advanced"/>
     </inputs>
@@ -217,6 +264,53 @@ sc.pp.recipe_seurat(
             </output>
             <output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
         </test>
+        <test expect_num_outputs="2">
+            <!-- test 5 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="external.pp.magic"/>
+                <param name="name_list" value="all_genes"/>
+                <param name="t" value="-1"/>
+                <param name="n_pca" value="5"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="external.pp.magic"/>
+                    <has_text_matching expression="name_list='all_genes'"/>
+                    <has_text_matching expression="t='auto'"/>
+                    <has_text_matching expression="n_pca=5"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- test 6 -->
+            <param name="adata" value="krumsiek11.h5ad" />
+            <conditional name="method">
+                <param name="method" value="external.pp.magic"/>
+                <param name="name_list" value="pca_only"/>
+                <param name="t" value="3"/>
+                <param name="n_pca" value="5"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true" />
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="external.pp.magic"/>
+                    <has_text_matching expression="name_list='pca_only'"/>
+                    <has_text_matching expression="t=3"/>
+                    <has_text_matching expression="n_pca=5"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
+            <assert_stdout>
+                <has_text text="X_magic"/>
+            </assert_stdout>
+        </test>
     </tests>
     <help><![CDATA[
 Normalize total counts per cell (`pp.normalize_per_cell`)
@@ -269,6 +363,20 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
 More details on the `scanpy documentation
 <https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__
 
+
+Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
+============================================================================================================
+
+MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.
+
+The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018). 
+
+- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
+- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.
+
+More details on the `scanpy documentation
+<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.external.pp.magic.html>`__
+
     ]]></help>
     <expand macro="citations"/>
 </tool>
diff --git a/tools/scanpy/test-data/external.pp.magic.all_genes.krumsiek11.h5ad b/tools/scanpy/test-data/external.pp.magic.all_genes.krumsiek11.h5ad
diff --git a/tools/scanpy/test-data/external.pp.magic.pca_only.krumsiek11.h5ad b/tools/scanpy/test-data/external.pp.magic.pca_only.krumsiek11.h5ad