diff --git a/tools/snapatac2/.shed.yml b/tools/snapatac2/.shed.yml
new file mode 100644
index 00000000000..39feccd03f8
--- /dev/null
+++ b/tools/snapatac2/.shed.yml
@@ -0,0 +1,28 @@
+name: snapatac2
+owner: iuc
+description: "SnapATAC2 – A Python/Rust package for single-cell epigenomics analysis"
+homepage_url: https://kzhang.org/SnapATAC2/
+long_description: |
+  SnapATAC2 is a flexible, versatile, and scalable single-cell omics analysis framework.
+
+remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/snapatac2
+type: unrestricted
+categories:
+- Epigenetics
+- Sequence Analysis
+auto_tool_repositories:
+  name_template: "{{ tool_id }}"
+  description_template: "Wrapper for the snapatac2 tool suite: {{ tool_name }}"
+suite:
+  name: "suite_snapatac2"
+  description: "SnapATAC2 – A Python/Rust package for single-cell epigenomics analysis"
+  long_description: |
+    SnapATAC2 is a flexible, versatile, and scalable single-cell omics analysis framework, featuring:
+
+      * Scale to more than 10 million cells.
+      * Blazingly fast preprocessing tools for BAM to fragment files conversion and count matrix generation.
+      * Matrix-free spectral embedding algorithm that is applicable to a wide range of single-cell omics data, including single-cell ATAC-seq, single-cell RNA-seq, single-cell Hi-C, and single-cell methylation.
+      * Efficient and scalable co-embedding algorithm for single-cell multi-omics data integration.
+      * End-to-end analysis pipeline for single-cell ATAC-seq data, including preprocessing, dimension reduction, clustering, data integration, peak calling, differential analysis, motif analysis, regulatory network analysis.
+      * Seamless integration with other single-cell analysis packages such as Scanpy.
+      * Implementation of fully backed AnnData.
\ No newline at end of file
diff --git a/tools/snapatac2/dimension_reduction_clustering.xml b/tools/snapatac2/dimension_reduction_clustering.xml
new file mode 100644
index 00000000000..26691c616be
--- /dev/null
+++ b/tools/snapatac2/dimension_reduction_clustering.xml
@@ -0,0 +1,579 @@
+<tool id="snapatac2_clustering" name="SnapATAC2 Clustering" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>and dimension reduction</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+export NUMBA_CACHE_DIR="\${TEMP:-/tmp}";
+@PREP_ADATA@
+@CMD@
+    ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == 'tl.spectral'
+	#if $method.features
+with open('$method.features') as f:
+	features_mask = [x.lower().capitalize() == "True" for x in f.read().splitlines()]
+	#end if
+sa.tl.spectral(
+	adata,
+	n_comps = $method.n_comps,
+	#if $method.features
+	features = features_mask,
+	#end if
+	random_state = $method.random_state,
+	#if $method.sample_size
+	sample_size = $method.sample_size,
+	#end if
+	chunk_size = $method.chunk_size,
+	distance_metric = '$method.distance_metric',
+	weighted_by_sd = $method.weighted_by_sd,
+	inplace = True
+)
+
+#else if $method.method == 'tl.umap'
+sa.tl.umap(
+	adata,
+	n_comps = $method.n_comps,
+	#if $method.use_dims != ''
+	    #set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+	use_dims=$dims,
+	#end if
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added',
+	random_state = $method.random_state,
+	inplace = True
+)
+
+#else if $method.method == 'pp.knn'
+sa.pp.knn(
+	adata,
+	n_neighbors = $method.n_neighbors,
+	#if $method.use_dims != ''
+	    #set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+	use_dims=$dims,
+	#end if
+	use_rep = '$method.use_rep',
+	method = '$method.algorithm',
+	inplace = True,
+	random_state = $method.random_state
+)
+
+#else if $method.method == 'tl.dbscan'
+sa.tl.dbscan(
+	adata,
+	eps = $method.eps,
+	min_samples = $method.min_samples,
+	leaf_size = $method.leaf_size,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.hdbscan'
+sa.tl.hdbscan(
+	adata,
+	min_cluster_size = $method.min_cluster_size,
+	#if $method.min_samples
+	min_samples = $method.min_samples,
+	#end if
+	cluster_selection_epsilon = $method.cluster_selection_epsilon,
+	alpha = $method.alpha,
+	cluster_selection_method = '$method.cluster_selection_method',
+	random_state = $method.random_state,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.leiden'
+sa.tl.leiden(
+	adata,
+	resolution = $method.resolution,
+	objective_function = '$method.objective_function',
+	min_cluster_size = $method.min_cluster_size,
+	n_iterations = $method.n_iterations,
+	random_state = $method.random_state,
+	key_added = '$method.key_added',
+	weighted = $method.weighted,
+	inplace = True
+)
+
+#else if $method.method == 'tl.kmeans'
+sa.tl.kmeans(
+	adata,
+	n_clusters = $method.n_clusters,
+	n_iterations = $method.n_iterations,
+	random_state = $method.random_state,
+	use_rep = '$method.use_rep',
+	key_added = '$method.key_added'
+)
+
+#else if $method.method == 'tl.aggregate_X'
+sa.tl.aggregate_X(
+	adata,
+	#if $method.groupby != ''
+	groupby = '$method.groupby',
+	#end if
+	normalize = '$method.normalize'
+)
+
+#else if $method.method == 'tl.aggregate_cells'
+sa.tl.aggregate_cells(
+	adata,
+	use_rep = '$method.use_rep',
+	#if $method.target_num_cells
+	target_num_cells = $method.target_num_cells,
+	#end if
+	min_cluster_size = $method.min_cluster_size,
+	random_state = $method.random_state,
+	key_added = '$method.key_added',
+	inplace = True
+)
+#end if
+
+@CMD_anndata_write_outputs@
+	]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="method">
+            <param name="method" type="select" label="Dimension reduction and Clustering">
+                <option value="tl.spectral">Perform dimension reduction using Laplacian Eigenmap, using 'tl.spectral'</option>
+                <option value="tl.umap">Compute Umap, using 'tl.umap'</option>
+                <option value="pp.knn">Compute a neighborhood graph of observations, using 'pp.knn'</option>
+                <option value="tl.leiden">Cluster cells into subgroups, using 'tl.leiden'</option>
+                <option value="tl.kmeans">Cluster cells into subgroups using the K-means algorithm, using 'tl.kmeans'</option>
+                <option value="tl.dbscan">Cluster cells into subgroups using the DBSCAN algorithm, using 'tl.dbscan'</option>
+                <option value="tl.hdbscan">Cluster cells into subgroups using the HDBSCAN algorithm, using 'tl.hdbscan'</option>
+                <option value="tl.aggregate_X">Aggregate values in adata.X in a row-wise fashion, using 'tl.aggregate_X'</option>
+                <option value="tl.aggregate_cells">Aggregate cells into pseudo-cells, using 'tl.aggregate_cells'</option>
+            </param>
+            <when value="tl.spectral">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_n_comps"/>
+                <param argument="features" type="data" format="txt" optional="true" label="Text file indicating features to keep. Each line contains only word (True/False)." help="True means that the feature is kept. False means the feature is removed"/>
+                <expand macro="param_random_state"/>
+                <param argument="sample_size" type="float" min="0" max="1" optional="true" label="Approximate the embedding using the Nystrom algorithm by selecting a subset of cells" help="Using this only when the number of cells is too large, e.g. &gt; 10,000,000, or the `distance_metric` is “jaccard”"/>
+                <param argument="chunk_size" type="integer" value="20000" label="chunk size"/>
+                <param argument="distance_metric" type="select" label="distance metric: “jaccard”, “cosine“">
+                    <option value="jaccard">jaccard</option>
+                    <option value="cosine">cosine</option>
+                </param>
+                <param argument="weighted_by_sd" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to weight the result eigenvectors by the square root of eigenvalues"/>
+            </when>
+            <when value="tl.umap">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_comps" type="integer" value="2" label="Number of dimensions of embedding"/>
+                <param argument="use_dims" type="text" optional="true" label="Use these dimensions in `use_rep`" help="comma separated list of dimensions">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="umap"/>
+                <expand macro="param_random_state"/>
+            </when>
+            <when value="pp.knn">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_neighbors" type="integer" value="50" label="The number of nearest neighbors to be searched"/>
+                <param argument="use_dims" type="text" value="" optional="true" label="The dimensions used for computation">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+                <param argument="algorithm" type="select" label="Choose method">
+                    <option value="kdtree" selected="true">'kdtree': use the kdtree algorithm to find the nearest neighbors</option>
+                    <option value="hora">'hora': use the HNSW algorithm to find the approximate nearest neighbors</option>
+                    <option value="pynndescent">'pynndescent': use the pynndescent algorithm to find the approximate nearest neighbors</option>
+                </param>
+                <param argument="random_state" type="integer" value="0" label="Random seed for approximate nearest neighbor search"/>
+            </when>
+            <when value="tl.leiden">
+                <expand macro="inputs_anndata"/>
+                <param argument="resolution" type="float" value="1" label="Parameter value controlling the coarseness of the clustering" help="Higher values lead to more clusters"/>
+                <param argument="objective_function" type="select" label="Whether to use the Constant Potts Model (CPM) or modularity">
+                    <option value="CPM">CPM</option>
+                    <option value="modularity">modularity</option>
+                    <option value="RBConfiguration">RBConfiguration</option>
+                </param>
+                <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/>
+                <expand macro="param_n_iterations"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_key_added" key_added="leiden"/>
+                <param argument="weighted" type="boolean" truevalue="True" falsevalue="False" label="Whether to use the edge weights in the graph"/>
+            </when>
+            <when value="tl.kmeans">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_clusters" type="integer" value="5" label="Number of clusters to return"/>
+                <expand macro="param_n_iterations"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="kmeans"/>
+            </when>
+            <when value="tl.dbscan">
+                <expand macro="inputs_anndata"/>
+                <param argument="eps" type="float" value="0.5" label=" The maximum distance between two samples for one to be considered as in the neighborhood of the other" help="This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function."/>
+                <param argument="min_samples" type="integer" value="5" label="The number of samples (or total weight) in a neighborhood for a point to be considered as a core point."/>
+                <param argument="leaf_size" type="integer" value="30" label="Leaf size passed to BallTree or cKDTree" help="This can affect the speed of the construction and query, as well as the memory required to store the tree."/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="dbscan"/>
+            </when>
+            <when value="tl.hdbscan">
+                <expand macro="inputs_anndata"/>
+                <param argument="min_cluster_size" type="integer" value="5" label="The minimum size of clusters"/>
+                <param argument="min_samples" type="integer" value="" optional="true" label="The number of samples in a neighbourhood for a point to be considered a core point"/>
+                <param argument="cluster_selection_epsilon" type="float" value="0.0" label="A distance threshold. Clusters below this value will be merged"/>
+                <param argument="alpha" type="float" value="1.0" label="A distance scaling parameter as used in robust single linkage"/>
+                <param argument="cluster_selection_method" type="select" label="The method used to select clusters from the condensed tree">
+                    <option value="eom">Excess of Mass algorithm to find the most persistent clusters</option>
+                    <option value="leaf">Select the clusters at the leaves of the tree - this provides the most fine grained and homogeneous clusters</option>
+                </param>
+                <expand macro="param_random_state"/>
+                <expand macro="param_use_rep"/>
+                <expand macro="param_key_added" key_added="hdbscan"/>
+            </when>
+            <when value="tl.aggregate_X">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_groupby"/>
+                <param argument="normalize" type="select" optional="true" label="normalization method">
+                    <option value="RPM">RPM</option>
+                    <option value="RPKM">RPKM</option>
+                </param>
+            </when>
+            <when value="tl.aggregate_cells">
+                <expand macro="inputs_anndata"/>
+                <expand macro="param_use_rep"/>
+                <param argument="target_num_cells" type="integer" value="" optional="true" label="target_num_cells" help="If None, `target_num_cells = num_cells / min_cluster_size`"/>
+                <param argument="min_cluster_size" type="integer" value="50" label="The minimum size of clusters"/>
+                <expand macro="param_random_state"/>
+                <expand macro="param_key_added" key_added="pseudo_cell"/>
+            </when>
+        </conditional>
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix"/>
+        <data name="hidden_output" format="txt" label="Log file">
+            <filter>advanced_common['show_log']</filter>
+        </data>
+        <data name="diff_peaks" format="tabular" from_work_dir="differential_peaks.tsv" label="${tool.name} on ${on_string}: Differential peaks">
+            <filter>method['method'] and 'tl.diff_test' in method['method']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <!-- tl.spectral -->
+            <conditional name="method">
+                <param name="method" value="tl.spectral"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/pp.select_features.pbmc_500_chr21.h5ad"/>
+                <param name="n_comps" value="30"/>
+                <param name="random_state" value="0"/>
+                <param name="chunk_size" value="20000"/>
+                <param name="distance_metric" value="jaccard"/>
+                <param name="weighted_by_sd" value="True"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.spectral"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="n_comps = 30"/>
+                    <has_text_matching expression="chunk_size = 20000"/>
+                    <has_text_matching expression="distance_metric = 'jaccard'"/>
+                    <has_text_matching expression="weighted_by_sd = True"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.umap -->
+            <conditional name="method">
+                <param name="method" value="tl.umap"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="n_comps" value="2"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="umap"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.umap"/>
+                    <has_text_matching expression="n_comps = 2"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'umap'"/>
+                    <has_text_matching expression="random_state = 0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.knn -->
+            <conditional name="method">
+                <param name="method" value="pp.knn"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.umap.pbmc_500_chr21.h5ad"/>
+                <param name="n_neighbors" value="50"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="method_" value="kdtree"/>
+                <param name="inplace" value="True"/>
+                <param name="random_state" value="0"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.knn"/>
+                    <has_text_matching expression="n_neighbors = 50"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="method = 'kdtree'"/>
+                    <has_text_matching expression="inplace = True"/>
+                    <has_text_matching expression="random_state = 0"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.leiden -->
+            <conditional name="method">
+                <param name="method" value="tl.leiden"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/pp.knn.pbmc_500_chr21.h5ad"/>
+                <param name="resolution" value="2"/>
+                <param name="objective_function" value="modularity"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="n_iterations" value="-1"/>
+                <param name="random_state" value="0"/>
+                <param name="key_added" value="leiden"/>
+                <param name="weighted" value="False"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.leiden"/>
+                    <has_text_matching expression="resolution = 2"/>
+                    <has_text_matching expression="objective_function = 'modularity'"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="n_iterations = -1"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="key_added = 'leiden'"/>
+                    <has_text_matching expression="weighted = False"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.leiden.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.kmeans -->
+            <conditional name="method">
+                <param name="method" value="tl.kmeans"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="n_iterations" value="-1"/>
+                <param name="random_state" value="0"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="kmeans"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.kmeans"/>
+                    <has_text_matching expression="n_iterations = -1"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'kmeans'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.kmeans.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.dbscan -->
+            <conditional name="method">
+                <param name="method" value="tl.dbscan"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="eps" value="0.5"/>
+                <param name="min_samples" value="3"/>
+                <param name="leaf_size" value="5"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="dbscan"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.dbscan"/>
+                    <has_text_matching expression="eps = 0.5"/>
+                    <has_text_matching expression="min_samples = 3"/>
+                    <has_text_matching expression="leaf_size = 5"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'dbscan'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.dbscan.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.hdbscan -->
+            <conditional name="method">
+                <param name="method" value="tl.hdbscan"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="min_samples" value="3"/>
+                <param name="cluster_selection_method" value="eom"/>
+                <param name="random_state" value="0"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="key_added" value="hdbscan"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.hdbscan"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="min_samples = 3"/>
+                    <has_text_matching expression="cluster_selection_method = 'eom'"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="key_added = 'hdbscan'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.hdbscan.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.aggregate_X -->
+            <conditional name="method">
+                <param name="method" value="tl.aggregate_X"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="normalize" value="RPKM"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.aggregate_X"/>
+                    <has_text_matching expression="normalize = 'RPKM'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_X.pbmc_500_chr21.h5ad"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- tl.aggregate_cells -->
+            <conditional name="method">
+                <param name="method" value="tl.aggregate_cells"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <param name="use_rep" value="X_spectral"/>
+                <param name="target_num_cells" value="5"/>
+                <param name="min_cluster_size" value="3"/>
+                <param name="random_state" value="0"/>
+                <param name="key_added" value="pseudo_cell"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.tl.aggregate_cells"/>
+                    <has_text_matching expression="use_rep = 'X_spectral'"/>
+                    <has_text_matching expression="target_num_cells = 5"/>
+                    <has_text_matching expression="min_cluster_size = 3"/>
+                    <has_text_matching expression="random_state = 0"/>
+                    <has_text_matching expression="key_added = 'pseudo_cell'"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" ftype="h5ad" compare="sim_size" delta_frac="0.1" location="https://zenodo.org/records/11199963/files/tl.aggregate_cells.pbmc_500_chr21.h5ad"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Perform dimension reduction using Laplacian Eigenmap, using `tl.spectral`
+=========================================================================
+
+Perform dimension reduction using Laplacian Eigenmaps.
+
+Convert the cell-by-feature count matrix into lower dimensional representations using the spectrum of the normalized graph Laplacian defined by pairwise similarity between cells. This function utilizes the matrix-free spectral embedding algorithm to compute the embedding when `distance_metric` is “cosine”, which scales linearly with the number of cells. For other types of similarity metrics, the time and space complexity scale quadratically with the number of cells.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.spectral.html>`__
+
+Compute Umap, using `tl.umap`
+=============================
+
+Compute Umap
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.umap.html>`__
+
+Compute a neighborhood graph of observations, using `pp.knn`
+============================================================
+
+Compute a neighborhood graph of observations.
+
+Computes a neighborhood graph of observations stored in adata using the method specified by method. The distance metric used is Euclidean.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.knn.html>`__
+
+Cluster cells into subgroups, using `tl.leiden`
+===============================================
+
+Cluster cells into subgroups.
+
+Cluster cells using the Leiden algorithm, an improved version of the Louvain algorithm. It has been proposed for single-cell analysis by. This requires having ran `knn`.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.leiden.html>`__
+
+Cluster cells into subgroups using the K-means algorithm, using `tl.kmeans`
+===========================================================================
+
+Cluster cells into subgroups using the K-means algorithm, a classical algorithm in data mining.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.kmeans.html>`__
+
+Cluster cells into subgroups using the DBSCAN algorithm, using `tl.dbscan`
+==========================================================================
+
+Cluster cells into subgroups using the DBSCAN algorithm.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.dbscan.html>`__
+
+Cluster cells into subgroups using the HDBSCAN algorithm, using `tl.hdbscan`
+============================================================================
+
+Cluster cells into subgroups using the HDBSCAN algorithm.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.hdbscan.html>`__
+
+Aggregate values in adata.X in a row-wise fashion, using `tl.aggregate_X`
+=========================================================================
+
+Aggregate values in adata.X in a row-wise fashion.
+
+Aggregate values in adata.X in a row-wise fashion. This is used to compute RPKM or RPM values stratified by user-provided groupings.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_X.html>`__
+
+Aggregate cells into pseudo-cells, using `tl.aggregate_cells`
+=============================================================
+
+Aggregate cells into pseudo-cells.
+
+Aggregate cells into pseudo-cells by iterative clustering.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.tl.aggregate_cells.html>`__
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/snapatac2/macros.xml b/tools/snapatac2/macros.xml
new file mode 100644
index 00000000000..2e34577fc17
--- /dev/null
+++ b/tools/snapatac2/macros.xml
@@ -0,0 +1,187 @@
+<macros>
+	<token name="@TOOL_VERSION@">2.5.3</token>
+    <token name="@VERSION_SUFFIX@">0</token>
+    <token name="@PROFILE@">23.0</token>
+    <xml name="requirements">
+        <requirement type="package" version="@TOOL_VERSION@">snapatac2</requirement>
+        <requirement type="package" version="5.18.0">plotly</requirement>
+        <requirement type="package" version="0.2.1">python-kaleido</requirement>
+        <requirement type="package" version="0.19.19">polars</requirement>
+        <requirement type="package" version="14.0.1">pyarrow</requirement>
+        <requirement type="package" version="0.11.3">python-igraph</requirement>
+        <requirement type="package" version="0.8.33">hdbscan</requirement>
+        <requirement type="package" version="0.0.9">harmonypy</requirement>
+        <requirement type="package" version="1.7.4">scanorama</requirement>
+        <yield />
+    </xml>
+
+    <token name="@PREP_ADATA@"><![CDATA[
+        cp '$method.adata' 'anndata.h5ad' &&
+        ]]>
+    </token>
+
+    <token name="@CMD@"><![CDATA[
+        cat '$script_file' > '$hidden_output' &&
+        python '$script_file' >> '$hidden_output' &&
+		touch 'anndata_info.txt' &&
+		cat 'anndata_info.txt' @CMD_prettify_stdout@
+        ]]>
+    </token>
+
+    <token name="@CMD_prettify_stdout@"><![CDATA[ | sed -r '1 s|AnnData object with (.+) = (.*)\s*|\1: \2|g' | sed "s|'||g"  | sed -r 's|^\s*(.*):\s(.*)|[\1]\n-    \2|g' | sed 's|, |\n-    |g'
+    ]]></token>
+
+    <token name="@CMD_imports@"><![CDATA[
+import snapatac2 as sa
+import os
+    ]]>
+    </token>
+    <xml name="sanitize_query" token_validinitial="string.printable">
+        <sanitizer>
+            <valid initial="@VALIDINITIAL@">
+                <remove value="&apos;" />
+            </valid>
+        </sanitizer>
+    </xml>
+
+    <xml name="inputs_anndata">
+        <param name="adata" type="data" format="h5ad" label="Annotated data matrix"/>
+    </xml>
+
+    <token name="@CMD_read_inputs@"><![CDATA[
+
+adata = sa.read('anndata.h5ad', backed = None)
+]]>
+    </token>
+
+    <xml name="dimentions_plot">
+        <param argument="width" type="integer" value="500" label="Width of the plot"/>
+		<param argument="height" type="integer" value="400" label="Height of the plot"/>
+    </xml>
+
+    <xml name="param_groupby">
+        <param argument="groupby" type="text" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+    </xml>
+
+    <xml name="out_file">
+        <param name="out_file" type="select" optional="true" label="Type of output file">
+            <option value="png" selected="true">PNG</option>
+            <option value="svg">SVG</option>
+            <option value="pdf">PDF</option>
+		</param>
+    </xml>
+    <token name="@CMD_anndata_write_outputs@"><![CDATA[
+adata.write('anndata.h5ad')
+with open('anndata_info.txt','w', encoding='utf-8') as ainfo:
+    print(adata, file=ainfo)
+]]>
+    </token>
+    <xml name="inputs_common_advanced">
+        <section name="advanced_common" title="Advanced Options" expanded="false">
+            <param name="show_log" type="boolean" checked="false" label="Output Log?" />
+        </section>
+    </xml>
+    <xml name="params_render_plot">
+        <param argument="width" type="integer" value="600" label="Width of the plot"/>
+        <param argument="height" type="integer" value="400" label="Height of the plot"/>
+        <expand macro="out_file"/>
+    </xml>
+    <xml name="param_shift">
+    	<param argument="shift_left" type="integer" value="4" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
+    	<param argument="shift_right" type="integer" value="-5" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
+    </xml>
+    <xml name="param_chunk_size" tokens="size">
+    	<param argument="chunk_size" type="integer" value="@SIZE@" label="chunk size"/>
+    </xml>
+	<xml name="min_max_frag_size">
+		<param argument="min_frag_size" type="integer" optional="true" value="" label="Minimum fragment size to include"/>
+		<param argument="max_frag_size" type="integer" optional="true" value="" label="Maximum fragment size to include"/>
+	</xml>
+	<xml name="params_data_integration">
+		<param argument="use_rep" type="text" value="X_spectral" label="The key for the matrix"/>
+		<param argument="use_dims" type="text" optional="true" value="" label="The dimensions used for computation">
+			<expand macro="sanitize_query"/>
+		</param>
+		<param argument="groupby" type="text" optional="true" value="" label="The key of the observation grouping to consider">
+            <expand macro="sanitize_query" />
+        </param>
+		<param argument="key_added" type="text" optional="true" value="" label="If specified, add the result to adata.obsm with this key"/>
+	</xml>
+    <xml name="param_n_comps">
+s        <param argument="n_comps" type="integer" value="30" label="Number of dimensions to keep" help="The result is insensitive to this parameter when `weighted_by_sd` is set, as long as it is large enough, e.g. 30."/>
+    </xml>
+    <xml name="param_random_state">
+        <param argument="random_state" type="integer" value="0" label="Seed of the random state generator"/>
+    </xml>
+    <xml name="param_key_added" tokens="key_added">
+        <param argument="key_added" type="text" value="@KEY_ADDED@"  label="`adata.obs` key under which t add cluster labels"/>
+    </xml>
+    <xml name="param_use_rep">
+        <param argument="use_rep" type="text" value="X_spectral" label="Use the indicated representation in `.obsm`"/>
+    </xml>
+    <xml name="genome_fasta">
+        <param argument="genome_fasta" type="text" label="A fasta file containing the genome sequences or a Genome object"/>
+    </xml>
+    <xml name="background">
+        <param argument="background" type="text" optional="true" value="" label="A list of regions to be used as the background">
+			<expand macro="sanitize_query"/>
+		</param>
+    </xml>
+    <xml name="mat">
+        <param argument="peak_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by peak count matrix"/>
+		<param argument="gene_mat" type="data" format="h5ad" optional="true" label="AnnData or AnnDataSet object storing the cell by gene count matrix"/>
+    </xml>
+    <xml name="param_network">
+        <param argument="network" type="text" label="network"/>
+    </xml>
+    <xml name="param_n_iterations">
+        <param argument="n_iterations" type="integer" value="-1" label="How many iterations of the Leiden clustering algorithm to perform"
+            help="Positive values above 2 define the total number of iterations to perform, -1 has the algorithm run until it reaches its optimal clustering."/>
+    </xml>
+
+    <xml name="citations">
+        <citations>
+            <citation type="doi">10.1038/s41592-023-02139-9</citation>
+        </citations>
+    </xml>
+    <xml name="render_plot_test">
+    	<param name="width" value="650"/>
+        <param name="height" value="450"/>
+    </xml>
+    <xml name="render_plot_matching_text">
+    	<has_text_matching expression="width = 650"/>
+        <has_text_matching expression="height = 450"/>
+    </xml>
+    <xml name="param_counting_strategy">
+        <param argument="counting_strategy" type="select" label="he strategy to compute feature counts">
+            <option value="fragment">fragment</option>
+            <option value="insertion" selected="true">insertion</option>
+            <option value="paired-insertion">paired-insertion</option>
+        </param>
+    </xml>
+
+    <token name="@CMD_params_data_integration@"><![CDATA[
+use_rep = '$method.use_rep',
+#if $method.use_dims != ''
+#set $dims = ([x.strip() for x in str($method.use_dims).split(',')])
+use_dims=$dims,
+#end if
+#if $method.groupby != ''
+#set $groupby = ([x.strip() for x in str($method.groupby).split(',')])
+groupby=$groupby,
+#end if
+#if $method.key_added != ''
+key_added = '$method.key_added',
+#end if
+    ]]>
+    </token>
+
+    <token name="@CMD_params_render_plot@"><![CDATA[
+    width = $method.width,
+    height = $method.height,
+    out_file = 'plot.$method.out_file',
+    ]]>
+    </token>
+</macros>
diff --git a/tools/snapatac2/plotting.xml b/tools/snapatac2/plotting.xml
new file mode 100644
index 00000000000..d7edfaea346
--- /dev/null
+++ b/tools/snapatac2/plotting.xml
@@ -0,0 +1,229 @@
+<tool id="snapatac2_plotting" name="SnapATAC2 Plotting" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+@PREP_ADATA@
+@CMD@
+	]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+@CMD_read_inputs@
+
+#if $method.method == 'pl.frag_size_distr'
+	#if $method.log_scale
+fig = sa.pl.frag_size_distr(adata, show=False)
+fig.update_yaxes(type="log")
+sa.pl.render_plot(fig, @CMD_params_render_plot@)
+	#else
+sa.pl.frag_size_distr(adata, @CMD_params_render_plot@)
+	#end if
+#else if $method.method == 'pl.tsse'
+sa.pl.tsse(
+    adata,
+    min_fragment = $method.min_fragment,
+    @CMD_params_render_plot@
+)
+#else if $method.method == 'pl.umap'
+sa.pl.umap(
+	adata,
+	color = '$method.color',
+	use_rep = '$method.use_rep',
+	marker_size = $method.marker_size,
+	marker_opacity = $method.marker_opacity,
+	sample_size = $method.sample_size,
+	@CMD_params_render_plot@
+)
+#else if $method.method == 'pl.regions'
+sa.pl.regions(
+	adata,
+	groupby = '$method.groupby',
+	peaks = '$method.peaks',
+	@CMD_params_render_plot@
+)
+#else if $method.method == 'pl.spectral_eigenvalues'
+sa.pl.spectral_eigenvalues(
+	adata,
+	@CMD_params_render_plot@
+)
+#end if
+	]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="method">
+            <param name="method" type="select" label="Method used for plotting">
+                <option value="pl.frag_size_distr">Plot fragment size distribution, using 'pl.frag_size_distr'</option>
+                <option value="pl.tsse">Plot the TSS enrichment vs. number of fragments density figure, using 'pl.tsse'</option>
+                <option value="pl.umap">Plot the UMAP embedding, using 'pl.umap'</option>
+                <option value="pl.spectral_eigenvalues">Plot the eigenvalues of spectral embedding, using 'pl.spectral_eigenvalues'</option>
+            </param>
+            <when value="pl.frag_size_distr">
+                <expand macro="inputs_anndata"/>
+                <param name="log_scale" type="boolean" checked="False" label="Change the y-axis (fragment counts) to log scale"/>
+                <expand macro="params_render_plot"/>
+            </when>
+            <when value="pl.tsse">
+                <expand macro="inputs_anndata"/>
+                <param argument="min_fragment" type="integer" value="500" label="Minimum number of unique fragments"/>
+                <expand macro="params_render_plot"/>
+            </when>
+            <when value="pl.umap">
+                <expand macro="inputs_anndata"/>
+                <param argument="color" type="text" value="" optional="true" label="Color"/>
+                <param argument="use_rep" type="text" value="X_umap" label="Use the indicated representation in .obsm"/>
+                <param argument="marker_size" type="float" value="" optional="true" label="Size of the dots"/>
+                <param argument="marker_opacity" type="float" value="1" label="Opactiy of the dots"/>
+                <param argument="sample_size" type="integer" value="" optional="true" label="Number of cells to use"/>
+                <expand macro="params_render_plot"/>
+            </when>
+            <when value="pl.spectral_eigenvalues">
+                <expand macro="inputs_anndata"/>
+                <expand macro="params_render_plot"/>
+            </when>
+        </conditional>
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="out_png" format="png" from_work_dir="plot.png" label="PNG plot from ${tool.name} (${method.method}) on ${on_string}">
+            <filter>method['out_file'] == 'png'</filter>
+        </data>
+        <data name="out_pdf" format="pdf" from_work_dir="plot.pdf" label="PDF plot from ${tool.name} (${method.method}) on ${on_string}">
+            <filter>method['out_file'] == 'pdf'</filter>
+        </data>
+        <data name="out_svg" format="svg" from_work_dir="plot.svg" label="SVG plot from ${tool.name} (${method.method}) on ${on_string}">
+            <filter>method['out_file'] == 'svg'</filter>
+        </data>
+        <data name="hidden_output" format="txt" label="Log file" hidden="true">
+            <filter>advanced_common['show_log']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="2">
+            <!-- pl.frag_size_distr -->
+            <conditional name="method">
+                <param name="method" value="pl.frag_size_distr"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/pp.import_data.pbmc_500_chr21.h5ad"/>
+                <param name="log_scale" value="True"/>
+                <param name="out_file" value="pdf"/>
+                <expand macro="render_plot_test"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="out_pdf" location="https://zenodo.org/records/11199963/files/pl.frag_size_distr.pdf" ftype="pdf" compare="sim_size" delta_frac="0.1"/>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pl.frag_size_distr"/>
+                    <has_text_matching expression="fig.update_yaxes"/>
+                    <expand macro="render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pl.tsse -->
+            <conditional name="method">
+                <param name="method" value="pl.tsse"/>
+                <param name="adata" location="https://zenodo.org/records/11199963/files/metrics.tsse.pbmc_500_chr21.h5ad"/>
+                <param name="min_fragment" value="500"/>
+                <param name="out_file" value="png"/>
+                <expand macro="render_plot_test"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pl.tsse"/>
+                    <has_text_matching expression="min_fragment = 500"/>
+                    <expand macro="render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+            <output name="out_png" location="https://zenodo.org/records/11199963/files/pl.tsse.png" ftype="png" compare="sim_size" delta_frac="0.1"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pl.umap -->
+            <conditional name="method">
+                <param name="method" value="pl.umap"/>
+                <param name="adata" ftype="h5ad" location="https://zenodo.org/records/11199963/files/tl.leiden.pbmc_500_chr21.h5ad"/>
+                <param name="color" value="leiden"/>
+                <param name="use_rep" value="X_umap"/>
+                <param name="marker_size" value="1"/>
+                <param name="marker_opacity" value="0.8"/>
+                <param name="sample_size" value="100"/>
+                <expand macro="render_plot_test"/>
+                <param name="out_file" value="svg"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pl.umap"/>
+                    <has_text_matching expression="color = 'leiden'"/>
+                    <has_text_matching expression="use_rep = 'X_umap'"/>
+                    <has_text_matching expression="marker_size = 1"/>
+                    <has_text_matching expression="marker_opacity = 0.8"/>
+                    <has_text_matching expression="sample_size = 100"/>
+                    <expand macro="render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+            <output name="out_svg" location="https://zenodo.org/records/11199963/files/pl.umap.svg" ftype="svg" compare="sim_size" delta_frac="0.1"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pl.spectral_eigenvalues -->
+            <conditional name="method">
+                <param name="method" value="pl.spectral_eigenvalues"/>
+                <param name="adata" ftype="h5ad" location="https://zenodo.org/records/11199963/files/tl.spectral.pbmc_500_chr21.h5ad"/>
+                <expand macro="render_plot_test"/>
+                <param name="out_file" value="pdf"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pl.spectral_eigenvalues"/>
+                    <expand macro="render_plot_matching_text"/>
+                </assert_contents>
+            </output>
+            <output name="out_pdf" location="https://zenodo.org/records/11199963/files/pl.spectral_eigenvalues.pdf" ftype="pdf" compare="sim_size" delta_frac="0.1"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Plot fragment size distribution, using `pl.frag_size_distr`
+===========================================================
+
+Plot fragment size distribution.
+
+`metrics.frag_size_distr` must be ran first in order to use this function.
+
+Plot the TSS enrichment vs. number of fragments density figure, using `pl.tsse`
+===============================================================================
+
+Plot the TSS enrichment vs. number of fragments density figure.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pl.tsse.html>`__
+
+Plot the UMAP embedding, using `pl.umap`
+========================================
+
+Plot the UMAP embedding.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pl.umap.html>`__
+
+Plot the eigenvalues of spectral embedding, using `pl.spectral_eigenvalues`
+===========================================================================
+
+Plot the eigenvalues of spectral embedding.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pl.spectral_eigenvalues.html>`__
+    ]]></help>
+    <expand macro="citations"/>
+</tool>
diff --git a/tools/snapatac2/preprocessing.xml b/tools/snapatac2/preprocessing.xml
new file mode 100644
index 00000000000..ed898a497c6
--- /dev/null
+++ b/tools/snapatac2/preprocessing.xml
@@ -0,0 +1,580 @@
+<tool id="snapatac2_preprocessing" name="SnapATAC2 Preprocessing" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
+    <description>and integration</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <expand macro="requirements"/>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+#if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data'
+@PREP_ADATA@
+#end if
+@CMD@
+    ]]></command>
+    <configfiles>
+        <configfile name="script_file"><![CDATA[
+@CMD_imports@
+
+#if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data'
+@CMD_read_inputs@
+#end if
+
+#if $method.method == 'pp.make_fragment_file'
+sa.pp.make_fragment_file(
+	bam_file = '$method.bam_file',
+	is_paired = $method.is_paired,
+	#if $method.barcode.extract_type == 'from_tag'
+		#if $method.barcode.barcode_tag != ''
+	barcode_tag = '$method.barcode.barcode_tag',
+		#end if
+	#elif $method.barcode.extract_type == 'from_read_names'
+		#if $method.barcode.barcode_regex != ''
+	barcode_regex = '$method.barcode.barcode_regex',
+		#end if
+	#end if
+	#if $method.umi_tag != ''
+	umi_tag = '$method.umi_tag',
+	#end if
+	#if $method.umi_regex != ''
+	umi_regex = '$method.umi_regex',
+	#end if
+	shift_right = $method.shift_right,
+	shift_left = $method.shift_left,
+	min_mapq = $method.min_mapq,
+	chunk_size = $method.chunk_size,
+	compression = 'gzip',
+	output_file = '$fragments_out',
+	tempdir = "."
+)
+
+#else if $method.method == 'pp.import_data'
+import csv
+with open('$method.chrom_sizes') as f:
+    chr_sizes = {x[0]:int(x[1]) for x in csv.reader(f, delimiter='\t')}
+
+sa.pp.import_data(
+	fragment_file = '$method.fragment_file',
+	chrom_sizes = chr_sizes,
+	min_num_fragments = $method.min_num_fragments,
+	sorted_by_barcode = $method.sorted_by_barcode,
+	#if str($method.whitelist) != 'None'
+	whitelist = '$method.whitelist',
+	#end if
+	shift_left = $method.shift_left,
+	shift_right = $method.shift_right,
+	#set $chr_mt = ([x.strip() for x in str($method.chrM).split(',')])
+	chrM = $chr_mt,
+	chunk_size = $method.chunk_size,
+	file = 'anndata.h5ad',
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.add_tile_matrix'
+sa.pp.add_tile_matrix(
+	adata,
+	bin_size = $method.bin_size,
+	inplace = True,
+	chunk_size = $method.chunk_size,
+	#if $method.exclude_chroms != ''
+	#set $excl_chroms = ([x.strip() for x in str($method.exclude_chroms).split(',')])
+	exclude_chroms = $excl_chroms,
+	#end if
+	#if $method.min_frag_size
+	min_frag_size = $method.min_frag_size,
+	#end if
+	#if $method.max_frag_size
+	max_frag_size = $method.max_frag_size,
+	#end if
+	##counting_strategy = '$method.counting_strategy',
+	count_frag_as_reads = $method.count_frag_as_reads,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.make_gene_matrix'
+sa.pp.make_gene_matrix(
+	adata,
+	gene_anno = '$method.gene_anno',
+	inplace = True,
+	chunk_size = $method.chunk_size,
+	use_x = $method.use_x,
+	id_type = '$method.id_type',
+	transcript_name_key = '$method.transcript_name_key',
+	transcript_id_key = '$method.transcript_id_key',
+	gene_name_key = '$method.gene_name_key',
+	gene_id_key = '$method.gene_id_key',
+	#if $method.min_frag_size
+	min_frag_size = $method.min_frag_size,
+	#end if
+	#if $method.max_frag_size
+	max_frag_size = $method.max_frag_size,
+	#end if
+	##counting_strategy = '$method.counting_strategy'
+	count_frag_as_reads = $method.count_frag_as_reads
+)
+
+#else if $method.method == 'pp.filter_cells'
+sa.pp.filter_cells(
+	adata,
+	min_counts = $method.min_counts,
+	min_tsse = $method.min_tsse,
+	#if $method.max_counts
+	max_counts = $method.max_counts,
+	#end if
+	#if $method.max_tsse
+	max_tsse = $method.max_tsse,
+	#end if
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.select_features'
+sa.pp.select_features(
+	adata,
+	n_features = $method.n_features,
+	filter_lower_quantile = $method.filter_lower_quantile,
+	filter_upper_quantile = $method.filter_upper_quantile,
+	#if str($method.whitelist) != 'None'
+	whitelist = '$method.whitelist',
+	#end if
+	#if str($method.blacklist) != 'None'
+	blacklist = '$method.blacklist',
+	#end if
+	max_iter = $method.max_iter,
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.scrublet'
+sa.pp.scrublet(
+	adata,
+	#if $method.features
+	features = '$method.features',
+	#end if
+	n_comps = $method.n_comps,
+	sim_doublet_ratio = $method.sim_doublet_ratio,
+	expected_doublet_rate = $method.expected_doublet_rate,
+	#if $method.n_neighbors
+	n_neighbors = $method.n_neighbors,
+	#end if
+	use_approx_neighbors = $method.use_approx_neighbors,
+	random_state = $method.random_state,
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.filter_doublets'
+sa.pp.filter_doublets(
+	adata,
+	#if $method.probability_threshold
+	probability_threshold = $method.probability_threshold,
+	#end if
+	#if $method.score_threshold
+	score_threshold = $method.score_threshold,
+	#end if
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.mnc_correct'
+sa.pp.mnc_correct(
+	adata,
+	batch = '$method.batch',
+	n_neighbors = $method.n_neighbors,
+	n_clusters = $method.n_clusters,
+	n_iter = $method.n_iter,
+	@CMD_params_data_integration@
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'pp.harmony'
+sa.pp.harmony(
+	adata,
+	batch = '$method.batch',
+	@CMD_params_data_integration@
+	inplace = True
+)
+
+#else if $method.method == 'pp.scanorama_integrate'
+sa.pp.scanorama_integrate(
+	adata,
+	batch = '$method.batch',
+	n_neighbors = $method.n_neighbors,
+	@CMD_params_data_integration@
+	inplace = True
+)
+
+#else if $method.method == 'metrics.frag_size_distr'
+sa.metrics.frag_size_distr(
+	adata,
+	max_recorded_size = $method.max_recorded_size,
+	add_key = '$method.add_key',
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+
+#else if $method.method == 'metrics.tsse'
+sa.metrics.tsse(
+	adata,
+	gene_anno = '$method.gene_anno',
+	inplace = True,
+	n_jobs = os.getenv("GALAXY_SLOTS", 4)
+)
+#end if
+
+#if $method.method != 'pp.make_fragment_file' and $method.method != 'pp.import_data'
+@CMD_anndata_write_outputs@
+#end if
+	]]></configfile>
+    </configfiles>
+    <inputs>
+        <conditional name="method">
+            <param name="method" type="select" label="Method used for preprocessing">
+                <option value="pp.make_fragment_file">Convert a BAM file to a fragment file, using 'pp.make_fragment_file'</option>
+                <option value="pp.import_data">Import data fragment files and compute basic QC metrics, using 'pp.import_data'</option>
+                <option value="pp.add_tile_matrix">Generate cell by bin count matrix, using 'pp.add_tile_matrix'</option>
+                <option value="pp.make_gene_matrix">Generate cell by gene activity matrix, using 'pp.make_gene_matrix'</option>
+                <option value="pp.filter_cells">Filter cell outliers based on counts and numbers of genes expressed, using 'pp.filter_cells'</option>
+                <option value="pp.select_features">Perform feature selection, using 'pp.select_features'</option>
+                <option value="pp.scrublet">Compute probability of being a doublet using the scrublet algorithm, using 'pp.scrublet'</option>
+                <option value="pp.filter_doublets">Remove doublets according to the doublet probability or doublet score, using 'pp.filter_doublets'</option>
+                <option value="pp.mnc_correct">A modified MNN-Correct algorithm based on cluster centroid, using 'pp.mnc_correct'</option>
+                <option value="pp.harmony">Use harmonypy to integrate different experiments,using 'pp.harmony'</option>
+                <option value="pp.scanorama_integrate">Use Scanorama [Hie19] to integrate different experiments, using 'pp.scanorama_integrate'</option>
+                <option value="metrics.frag_size_distr">Compute the fragment size distribution of the dataset, using 'metrics.frag_size_distr'</option>
+                <option value="metrics.tsse">Compute the TSS enrichment score (TSSe) for each cell, using 'metrics.tsse'</option>
+            </param>
+            <when value="pp.make_fragment_file">
+                <param argument="bam_file" type="data" format="bam" label="File name of the BAM file"/>
+                <param argument="is_paired" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Indicate whether the BAM file contain paired-end reads"/>
+                <conditional name="barcode">
+                    <param name="extract_type" type="select" label="How to extract barcodes from BAM records?">
+                        <option value="from_tag">From TAG fileds</option>
+                        <option value="from_read_names">From read names using regular expressions</option>
+                    </param>
+                    <when value="from_tag">
+                        <param argument="barcode_tag" type="text" value="CB" optional="true" label="Extract barcodes from TAG fields of BAM records"/>
+                    </when>
+                    <when value="from_read_names">
+                        <param argument="barcode_regex" type="text" value="" optional="true" label="Extract barcodes from read names of BAM records using regular expressions" help="`(..:..:..:..):w+$` extracts `bd:69:Y6:10` from `A01535:24:HW2MMDSX2:2:1359:8513:3458:bd:69:Y6:10:TGATAGGTT``"/>
+                    </when>
+                </conditional>
+                <param argument="umi_tag" type="text" value="" optional="true" label="Extract UMI from TAG fields of BAM records"/>
+                <param argument="umi_regex" type="text" value="" optional="true" label="Extract UMI from read names of BAM records using regular expressions"/>
+                <expand macro="param_shift"/>
+                <param argument="min_mapq" type="integer" min="0" value="30" label="Filter the reads based on MAPQ"/>
+                <expand macro="param_chunk_size" size="50000000"/>
+            </when>
+            <when value="pp.import_data">
+                <param argument="fragment_file" type="data" format="interval" label="Fragment file, optionally compressed with gzip or zstd"/>
+                <param argument="chrom_sizes" type="data" format="tabular" label="A tabular file containing chromosome names and sizes"/>
+                <param argument="min_num_fragments" type="integer" value="200" label="Number of unique fragments threshold used to filter cells"/>
+                <param argument="sorted_by_barcode" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether the fragment file has been sorted by cell barcodes"/>
+                <param argument="whitelist" type="data" format="txt" optional="True" label="Whitelist file with a list of barcodes" help="Each line must contain a valid barcode. When provided, only barcodes in the whitelist will be retained."/>
+                <param argument="chrM" type="text" value="chrM, M" label="A list of chromosome names that are considered mitochondrial DNA">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="shift_left" type="integer" value="0" label="Insertion site correction for the left end" help="Note this has no effect on single-end reads"/>
+                <param argument="shift_right" type="integer" value="0" label="Insertion site correction for the right end" help="Note this has no effect on single-end reads"/>
+                <expand macro="param_chunk_size" size="2000"/>
+            </when>
+            <when value="pp.add_tile_matrix">
+                <expand macro="inputs_anndata"/>
+                <param argument="bin_size" type="integer" value="500" label="The size of consecutive genomic regions used to record the counts"/>
+                <expand macro="param_chunk_size" size="500"/>
+                <param argument="exclude_chroms" type="text" value="chrM, chrY, M, Y" optional="true" label="A list of chromosomes to exclude">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <expand macro="min_max_frag_size"/>
+                <!--expand macro="param_counting_strategy"/-->
+                <param argument="count_frag_as_reads" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to count fragments as reads"/>
+            </when>
+            <when value="pp.make_gene_matrix">
+                <expand macro="inputs_anndata"/>
+                <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/>
+                <expand macro="param_chunk_size" size="500"/>
+                <param argument="use_x" type="boolean" truevalue="True" falsevalue="False" checked="false" label="If True, use the matrix stored in .X as raw counts"/>
+                <param argument="id_type" type="select" label="Id type, 'gene' or 'transcript'">
+                    <option value="gene" selected="true">gene</option>
+                    <option value="transcript">transcript</option>
+                </param>
+                <param argument="transcript_name_key" type="text" value="transcript_name" label="The key of the transcript name in the gene annotation file"/>
+                <param argument="transcript_id_key" type="text" value="transcript_id" label="The key of the transcript id in the gene annotation file"/>
+                <param argument="gene_name_key" type="text" value="gene_name" label="The key of the gene name in the gene annotation file"/>
+                <param argument="gene_id_key" type="text" value="gene_id" label="The key of the gene id in the gene annotation file"/>
+                <expand macro="min_max_frag_size"/>
+                <!--expand macro="param_counting_strategy"/-->
+                <param argument="count_frag_as_reads" type="boolean" truevalue="True" falsevalue="False" checked="true" label="Whether to count fragments as reads"/>
+            </when>
+            <when value="pp.filter_cells">
+                <expand macro="inputs_anndata"/>
+                <param argument="min_counts" type="integer" value="1000" label="Minimum number of counts required for a cell to pass filtering"/>
+                <param argument="min_tsse" type="float" value="5.0" label="Minimum TSS enrichemnt score required for a cell to pass filtering"/>
+                <param argument="max_counts" type="integer" value="" optional="true" label="Maximum number of counts required for a cell to pass filtering"/>
+                <param argument="max_tsse" type="float" value="" optional="true" label="Maximum TSS enrichment score expressed required for a cell to pass filtering"/>
+            </when>
+            <when value="pp.select_features">
+                <expand macro="inputs_anndata"/>
+                <param argument="n_features" type="integer" min="1" value="500000" label="Number of features to keep"/>
+                <param argument="filter_lower_quantile" type="float" min="0" value="0.005" label="Lower quantile of the feature count distribution to filter out"/>
+                <param argument="filter_upper_quantile" type="float" min="0" value="0.005" label="Upper quantile of the feature count distribution to filter out"/>
+                <param argument="whitelist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide whitelist regions"/>
+                <param argument="blacklist" type="data" format="bed" optional="true" label="A user provided bed file containing genome-wide blacklist regions"/>
+                <param argument="max_iter" type="integer" value="1" label="If greater than 1, this function will perform iterative clustering and feature selection"/>
+            </when>
+            <when value="pp.scrublet">
+                <expand macro="inputs_anndata"/>
+                <param argument="features" type="text" value="" optional="true" label=" Boolean index mask, where True means that the feature is kept, and False means the feature is removed."/>
+                <param argument="n_comps" type="integer" value="15" label="Number of components" help="15 is usually sufficient. The algorithm is not sensitive to this parameter"/>
+                <param argument="sim_doublet_ratio" type="float" value="2.0" label="Number of doublets to simulate relative to the number of observed cells"/>
+                <param argument="expected_doublet_rate" type="float" value="0.1" label="Expected doublet rate"/>
+                <param argument="n_neighbors" type="integer" value="" optional="true" label="Number of neighbors used to construct the KNN graph of observed cells and simulated doublets"/>
+                <param argument="use_approx_neighbors" type="boolean" truevalue="True" falsevalue="False" checked="false" label="Whether to use approximate search"/>
+                <param argument="random_state" type="integer" value="0" label="Random state"/>
+            </when>
+            <when value="pp.filter_doublets">
+                <expand macro="inputs_anndata"/>
+                <param argument="probability_threshold" type="float" value="0.5" label="Threshold for doublet probability"/>
+                <param argument="score_threshold" type="float" value="" optional="true" label="Threshold for doublet score"/>
+            </when>
+            <when value="pp.mnc_correct">
+                <expand macro="inputs_anndata"/>
+                <param argument="batch" type="text" value="batch" label="Batch labels for cells">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="n_neighbors" type="integer" value="5" label="Number of mutual nearest neighbors"/>
+                <param argument="n_clusters" type="integer" value="40" label="Number of clusters"/>
+                <param argument="n_iter" type="integer" value="1" label="Number of iterations"/>
+                <expand macro="params_data_integration"/>
+            </when>
+            <when value="pp.harmony">
+                <expand macro="inputs_anndata"/>
+                <param argument="batch" type="text" value="batch" label="Batch labels for cells">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <expand macro="params_data_integration"/>
+            </when>
+            <when value="pp.scanorama_integrate">
+                <expand macro="inputs_anndata"/>
+                <param argument="batch" type="text" value="batch" label="Batch labels for cells">
+                    <expand macro="sanitize_query"/>
+                </param>
+                <param argument="n_neighbors" type="integer" value="20" label="Number of mutual nearest neighbors"/>
+                <expand macro="params_data_integration"/>
+            </when>
+            <when value="metrics.frag_size_distr">
+                <!-- TODO move this to plotting -->
+                <expand macro="inputs_anndata"/>
+                <param argument="max_recorded_size" type="integer" min="1" value="1000" label="The maximum fragment size to record in the result"/>
+                <param argument="add_key" type="text" value="frag_size_distr" label="Key used to store the result in `adata.uns`"/>
+            </when>
+            <when value="metrics.tsse">
+                <!-- TODO move this to plotting -->
+                <expand macro="inputs_anndata"/>
+                <param argument="gene_anno" type="data" format="gtf,gff3" label="GTF/GFF file containing the gene annotation"/>
+            </when>
+        </conditional>
+        <expand macro="inputs_common_advanced"/>
+    </inputs>
+    <outputs>
+        <data name="fragments_out" format="interval" label="${tool.name} (${method.method}) on ${on_string}: Fragment file">
+            <filter>method['method'] == 'pp.make_fragment_file'</filter>
+        </data>
+        <data name="anndata_out" format="h5ad" from_work_dir="anndata.h5ad" label="${tool.name} (${method.method}) on ${on_string}: Annotated data matrix">
+            <filter>method['method'] != 'pp.make_fragment_file'</filter>
+        </data>
+        <data name="hidden_output" format="txt" label="Log file">
+            <filter>advanced_common['show_log']</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <!-- pp.make_fragment_file -->
+            <conditional name="method">
+                <param name="method" value="pp.make_fragment_file"/>
+                <param name="bam_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21_subsample.bam"/>
+                <param name="is_paired" value="true"/>
+                <conditional name="barcode">
+                    <param name="extract_type" value="from_tag"/>
+                    <param name="barcode_tag" value="CB"/>
+                </conditional>
+                <param name="shift_left" value="4"/>
+                <param name="shift_right" value="-5"/>
+                <param name="min_mapq" value="10"/>
+                <param name="chunk_size" value="50000000"/>
+            </conditional>
+            <output name="fragments_out" location="https://zenodo.org/records/11199963/files/pp.make_fragment_file.pbmc_500_chr21.tsv.gz" ftype="interval" compare="sim_size" delta_frac="0.1"/>
+        </test>
+        <test expect_num_outputs="2">
+            <!-- pp.pp.import_data -->
+            <conditional name="method">
+                <param name="method" value="pp.import_data"/>
+                <param name="fragment_file" location="https://zenodo.org/records/11199963/files/pbmc_500_chr21.tsv.gz"/>
+                <param name="chrom_sizes" location="https://zenodo.org/records/11199963/files/chr21_size.tabular"/>
+                <param name="min_num_fragments" value="1"/>
+                <param name="sorted_by_barcode" value="False"/>
+                <param name="shift_left" value="0"/>
+                <param name="chrM" value="chrM, M"/>
+                <param name="shift_right" value="0"/>
+                <param name="chunk_size" value="1000"/>
+            </conditional>
+            <section name="advanced_common">
+                <param name="show_log" value="true"/>
+            </section>
+            <output name="hidden_output">
+                <assert_contents>
+                    <has_text_matching expression="sa.pp.import_data"/>
+                    <has_text_matching expression="min_num_fragments = 1"/>
+                    <has_text_matching expression="sorted_by_barcode = False"/>
+                    <has_text_matching expression="shift_left = 0"/>
+                    <has_text_matching expression="chrM = \['chrM', 'M'\]"/>
+                    <has_text_matching expression="shift_right = 0"/>
+                    <has_text_matching expression="chunk_size = 1000"/>
+                </assert_contents>
+            </output>
+            <output name="anndata_out" location="https://zenodo.org/records/11199963/files/pp.import_data.pbmc_500_chr21.h5ad" ftype="h5ad" compare="sim_size" delta_frac="0.1"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Convert a BAM file`to a fragment file, `using pp.make_fragment_file`
+====================================================================
+
+Convert a BAM file to a fragment file.
+
+Convert a BAM file to a fragment file by performing the following steps:
+
+- Filtering: remove reads that are unmapped, not primary alignment, mapq < 30, fails platform/vendor quality checks, or optical duplicate. For paired-end sequencing, it also removes reads that are not properly aligned.
+
+- Deduplicate: Sort the reads by cell barcodes and remove duplicated reads for each unique cell barcode.
+
+- Output: Convert BAM records to fragments (if paired-end) or single-end reads.
+
+The bam file needn’t be sorted or filtered.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_fragment_file.html>`__
+
+Import data fragment file` and compute basic QC metrics, using `pp.import_data`
+===============================================================================
+
+Import data fragment files and compute basic QC metrics.
+
+A fragment refers to the sequence data originating from a distinct location in the genome. In single-ended sequencing, one read equates to a fragment. However, in paired-ended sequencing, a fragment is defined by a pair of reads. This function is designed to handle, store, and process input files with fragment data, further yielding a range of basic Quality Control (QC) metrics. These metrics include the total number of unique fragments, duplication rates, and the percentage of mitochondrial DNA detected.
+
+How fragments are stored is dependent on the sequencing approach utilized. For single-ended sequencing, fragments are found in `.obsm['fragment_single']`. In contrast, for paired-ended sequencing, they are located in `.obsm['fragment_paired']`.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.import_data.html>`__
+
+Generate cell by bin count matrix, using `pp.add_tile_matrix`
+=============================================================
+
+Generate cell by bin count matrix.
+
+This function is used to generate and add a cell by bin count matrix to the AnnData object.
+
+`import_data` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.add_tile_matrix.html>`__
+
+Generate cell by gene activity matrix, using `pp.make_gene_matrix`
+==================================================================
+
+Generate cell by gene activity matrix.
+
+Generate cell by gene activity matrix by counting the TN5 insertions in gene body regions. The result will be stored in a new file and a new AnnData object will be created.
+
+`import_data` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.make_gene_matrix.html>`__
+
+Filter cell outliers based on counts and numbers of genes expressed, using `pp.filter_cells`
+============================================================================================
+
+Filter cell outliers based on counts and numbers of genes expressed. For instance, only keep cells with at least `min_counts` counts or `min_ts`` TSS enrichment scores. This is to filter measurement outliers, i.e. “unreliable” observations.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_cells.html>`__
+
+Perform feature selection, using `pp.select_features`
+=====================================================
+
+Perform feature selection by selecting the most accessibile features across all cells unless `max_iter` > 1
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.select_features.html>`__
+
+Compute probability of being a doublet using the scrublet algorithm, using `pp.scrublet`
+========================================================================================
+
+Compute probability of being a doublet using the scrublet algorithm.
+
+This function identifies doublets by generating simulated doublets using randomly pairing chromatin accessibility profiles of individual cells. The simulated doublets are then embedded alongside the original cells using the spectral embedding algorithm in this package. A k-nearest-neighbor classifier is trained to distinguish between the simulated doublets and the authentic cells. This trained classifier produces a “doublet score” for each cell. The doublet scores are then converted into probabilities using a Gaussian mixture model.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scrublet.html>`__
+
+Remove doublets according to the doublet probability or doublet score, using `pp.filter_doublets`
+=================================================================================================
+
+Remove doublets according to the doublet probability or doublet score.
+
+The user can choose to remove doublets by either the doublet probability or the doublet score. `scrublet` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.filter_doublets.html>`__
+
+A modified MNN-Correct algorithm based on cluster centroid, using `pp.mnc_correct`
+==================================================================================
+
+A modified MNN-Correct algorithm based on cluster centroid.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.mnc_correct.html>`__
+
+Use harmonypy to integrate different experiments,using `pp.harmony`
+===================================================================
+
+Use harmonypy to integrate different experiments.
+
+Harmony is an algorithm for integrating single-cell data from multiple experiments. This function uses the python port of Harmony, `harmonypy`, to integrate single-cell data stored in an AnnData object. This function should be run after performing dimension reduction.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.harmony.html>`__
+
+Use Scanorama to integrate different experiments, using `pp.scanorama_integrate`
+========================================================================================
+
+Use Scanorama to integrate different experiments.
+
+Scanorama is an algorithm for integrating single-cell data from multiple experiments stored in an AnnData object. This function should be run after performing `tl.spectral` but before computing the neighbor graph.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.pp.scanorama_integrate.html>`__
+
+Compute the fragment size distribution of the dataset, using `metrics.frag_size_distr`
+======================================================================================
+
+Compute the fragment size distribution of the dataset.
+
+This function computes the fragment size distribution of the dataset. Note that it does not operate at the single-cell level. The result is stored in a vector where each element represents the number of fragments and the index represents the fragment length. The first posision of the vector is reserved for fragments with size larger than the `max_recorded_size` parameter. `import_data` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.frag_size_distr.html>`__
+
+Compute the TSS enrichment score (TSSe) for each cell, using `metrics.tsse`
+===========================================================================
+
+Compute the TSS enrichment score (TSSe) for each cell.
+
+`import_data` must be ran first in order to use this function.
+
+More details on the `SnapATAC2 documentation
+<https://kzhang.org/SnapATAC2/api/_autosummary/snapatac2.metrics.tsse.html>`__
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>