Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update scanpy with magic-impute functionality #5866

Merged
merged 4 commits into from
Mar 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tools/scanpy/cluster_reduce_dimension.xml
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ sc.tl.embedding_density(
<param argument="groups" type="text" value="louvain" label="Key for categorical in the input" help="You can pass your predefined groups by choosing any categorical annotation of observations ('adata.obs').">
<expand macro="sanitize_query" />
</param>
<param argument="use_rna_velocity" type="boolean" truevalue="False" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
<param argument="use_rna_velocity" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Use RNA velocity to orient edges in the abstracted graph and estimate transitions?" help="Requires that 'adata.uns' contains a directed single-cell graph with key '['velocyto_transitions']'. This feature might be subject to change in the future."/>
<param argument="model" type="select" label="PAGA connectivity model" help="">
<option value="v1.2">v1.2</option>
<option value="v1.0">v1.0</option>
Expand Down
3 changes: 2 additions & 1 deletion tools/scanpy/macros.xml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<macros>
<token name="@TOOL_VERSION@">1.9.6</token>
<token name="@VERSION_SUFFIX@">1</token>
<token name="@VERSION_SUFFIX@">2</token>
<token name="@profile@">22.05</token>
<xml name="requirements">
<requirements>
Expand All @@ -11,6 +11,7 @@
<requirement type="package" version="1.5.3">pandas</requirement>
<requirement type="package" version="3.7">matplotlib</requirement>
<requirement type="package" version="0.12.2">seaborn</requirement>
<requirement type="package" version="3.0.0">magic-impute</requirement>
<yield />
</requirements>
</xml>
Expand Down
110 changes: 109 additions & 1 deletion tools/scanpy/normalize.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<tool id="scanpy_normalize" name="Normalize" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@profile@">
<description>with scanpy</description>
<description>and impute with scanpy</description>
<macros>
<import>macros.xml</import>
</macros>
Expand Down Expand Up @@ -65,6 +65,29 @@ sc.pp.recipe_seurat(
plot=False,
copy=False)

#else if $method.method == "external.pp.magic"
sc.external.pp.magic(
adata=adata,
name_list='$method.name_list',
knn=$method.knn,
#if str($method.decay) != ''
decay=$method.decay,
#end if
#if str($method.knn_max) != ''
knn_max=$method.knn_max,
#end if
#if $method.t == -1
t='auto',
#else
t=$method.t,
#end if
#if str($method.n_pca) != ''
n_pca=$method.n_pca,
#end if
solver='$method.solver',
knn_dist='$method.knn_dist',
random_state=$method.random_state,
copy=False)
#end if

@CMD_anndata_write_outputs@
Expand All @@ -79,6 +102,7 @@ sc.pp.recipe_seurat(
<option value="pp.recipe_zheng17">Normalization and filtering as of Zheng et al. (2017), using 'pp.recipe_zheng17'</option>
<option value="pp.recipe_weinreb17">Normalization and filtering as of Weinreb et al (2017), using 'pp.recipe_weinreb17'</option>
<option value="pp.recipe_seurat">Normalization and filtering as of Seurat et al (2015), using 'pp.recipe_seurat'</option>
<option value="external.pp.magic">Denoising using Markov Affinity-based Graph Imputation of Cells (MAGIC) API 'external.pp.magic'</option>
</param>
<when value="pp.normalize_total">
<param argument="target_sum" type="float" value="" optional="true" label="Target sum" help="If not provided, after normalization, each observation (cell) has a total count equal to the median of the total counts (cells) before normalization."/>
Expand Down Expand Up @@ -119,6 +143,29 @@ sc.pp.recipe_seurat(
<when value="pp.recipe_seurat">
<expand macro="param_log"/>
</when>
<when value="external.pp.magic">
<param name="name_list" type="select" label="Denoised genes to return" help="Selecting all genes may require a large amount of memory">
<option value="all_genes">All genes</option>
<option value="pca_only">PCA only</option>
</param>
<param argument="knn" type="integer" min="1" value="5" label="Number of nearest neighbors on which to build kernel" help=""/>
<param argument="decay" type="integer" optional="true" value="1" label="Set decay rate of kernel tails"
help="If not set, alpha decaying kernel is not used" />
<param argument="knn_max" type="integer" min="1" optional="true" value="" label="Maximum number of nearest neighbors with nonzero connection"
help="If not set, will be set to 3 * knn" />
<param argument="t" type="integer" min="-1" value="3" label="Power to which the diffusion operator is powered. This sets the level of diffusion"
help="If ‘-1’, this parameter is selected according to the Procrustes disparity of the diffused data." />
<param argument="n_pca" type="integer" value="100" optional="true" label="Number of principal components to use for calculating neighborhoods"
help="For extremely large datasets, using n_pca less than 20 allows neighborhoods to be calculated in roughly log(n_samples) time. If not set, no PCA is performed." />
<param name="solver" type="select" label="Which solver to use" help="Selecting all genes may require a large amount of memory">
<option value="exact">"exact", the implementation described in van Dijk et al. (2018) </option>
<option value="approximate">"approximate", is faster that performs imputation in the PCA space and then projects back to the gene space</option>
</param>
<param name="knn_dist" type="select" label="Distance metric to use for the data" help="See scipy.spatial.distance.pdist documentation for more options https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html">
<expand macro="distance_metric_options"/>
</param>
<expand macro="param_random_state"/>
</when>
</conditional>
<expand macro="inputs_common_advanced"/>
</inputs>
Expand Down Expand Up @@ -217,6 +264,53 @@ sc.pp.recipe_seurat(
</output>
<output name="anndata_out" file="pp.recipe_seurat.recipe_zheng17.h5ad" ftype="h5ad" compare="sim_size" delta="1000000" delta_frac="0.25"/>
</test>
<test expect_num_outputs="2">
<!-- test 5 -->
<param name="adata" value="krumsiek11.h5ad" />
<conditional name="method">
<param name="method" value="external.pp.magic"/>
<param name="name_list" value="all_genes"/>
<param name="t" value="-1"/>
<param name="n_pca" value="5"/>
</conditional>
<section name="advanced_common">
<param name="show_log" value="true" />
</section>
<output name="hidden_output">
<assert_contents>
<has_text_matching expression="external.pp.magic"/>
<has_text_matching expression="name_list='all_genes'"/>
<has_text_matching expression="t='auto'"/>
<has_text_matching expression="n_pca=5"/>
</assert_contents>
</output>
<output name="anndata_out" file="external.pp.magic.all_genes.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
</test>
<test expect_num_outputs="2">
<!-- test 6 -->
<param name="adata" value="krumsiek11.h5ad" />
<conditional name="method">
<param name="method" value="external.pp.magic"/>
<param name="name_list" value="pca_only"/>
<param name="t" value="3"/>
<param name="n_pca" value="5"/>
</conditional>
<section name="advanced_common">
<param name="show_log" value="true" />
</section>
<output name="hidden_output">
<assert_contents>
<has_text_matching expression="external.pp.magic"/>
<has_text_matching expression="name_list='pca_only'"/>
<has_text_matching expression="t=3"/>
<has_text_matching expression="n_pca=5"/>
</assert_contents>
</output>
<output name="anndata_out" file="external.pp.magic.pca_only.krumsiek11.h5ad" ftype="h5ad" compare="sim_size"/>
<assert_stdout>
<has_text text="X_magic"/>
</assert_stdout>
</test>
</tests>
<help><![CDATA[
Normalize total counts per cell (`pp.normalize_per_cell`)
Expand Down Expand Up @@ -269,6 +363,20 @@ Expects non-logarithmized data. If using logarithmized data, pass `log=False`.
More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.pp.recipe_seurat.html>`__


Markov Affinity-based Graph Imputation of Cells (MAGIC) as of Van Dijk D et al. (2018) (`external.pp.magic`)
============================================================================================================

MAGIC is an algorithm for denoising and transcript recover of single cells applied to single-cell sequencing data. MAGIC builds a graph from the data and uses diffusion to smooth out noise and recover the data manifold.

The algorithm implemented here has changed primarily in two ways compared to the algorithm described in Van Dijk D et al. (2018).

- Firstly, we use the adaptive kernel described in Moon et al, (2019) for improved stability.
- Secondly, data diffusion is applied in the PCA space, rather than the data space, for speed and memory improvements.

More details on the `scanpy documentation
<https://icb-scanpy.readthedocs-hosted.com/en/stable/api/scanpy.external.pp.magic.html>`__

]]></help>
<expand macro="citations"/>
</tool>
Binary file not shown.
Binary file not shown.