Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add spacexr #6212

Draft
wants to merge 46 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
8e3eb4a
started spacexr
nilchia Aug 2, 2024
a8c1906
started adding config file and help
nilchia Aug 5, 2024
460e8f2
params for reference
nilchia Aug 5, 2024
88fd911
added st param
nilchia Aug 5, 2024
54ff8da
correct format
nilchia Aug 5, 2024
8f1fb30
finished config file
nilchia Aug 5, 2024
3287e68
update rctd
nilchia Aug 6, 2024
ea3ad72
corrected tool name
nilchia Aug 6, 2024
74481cf
add shed.yml
nilchia Aug 6, 2024
fac4e1c
Update tools/spacexr/macros.xml
nilchia Aug 25, 2024
3fca0f8
Merge branch 'galaxyproject:main' into spacexr
nilchia Sep 15, 2024
f848cff
Merge branch 'galaxyproject:main' into spacexr
nilchia Sep 27, 2024
26d2d47
add test-data
nilchia Aug 6, 2024
e7d0fa3
add multi
nilchia Sep 27, 2024
6433511
add output
nilchia Sep 27, 2024
1adcbed
update output
nilchia Oct 8, 2024
16bb7e0
add test (is failing)
nilchia Oct 8, 2024
270f26d
first test pass
nilchia Oct 9, 2024
7ba98dd
test for full and multi
nilchia Oct 9, 2024
dae4129
correct categories
nilchia Oct 9, 2024
544dd98
add doi of CSIDE
nilchia Oct 11, 2024
1e0bad9
correct rds output of multi
nilchia Oct 11, 2024
8c2c3a4
started CSIDE input
nilchia Oct 14, 2024
f8dc7f7
add nonparametric script
nilchia Oct 15, 2024
8a7f38c
add pathologic DE input
nilchia Oct 15, 2024
4b1c4ec
cell2cell script
nilchia Oct 15, 2024
ea4ae35
clean macros
nilchia Oct 15, 2024
7b30145
add XY and custom part1
nilchia Oct 18, 2024
1bd854d
custom input and command
nilchia Oct 18, 2024
944acb2
output
nilchia Oct 18, 2024
84ceba2
correct rds output rctd
nilchia Oct 18, 2024
55cf2ca
better test-data for rctd
nilchia Oct 18, 2024
5fb6a97
first test cside
nilchia Oct 18, 2024
73624f3
fix some lint error
nilchia Oct 19, 2024
591ffd1
better label and name
nilchia Oct 21, 2024
df5391f
CDATA
nilchia Oct 22, 2024
3cdc55d
add env varaible, validator for text input, and update macro
nilchia Oct 22, 2024
8e448d5
select box for output
nilchia Oct 22, 2024
642ed70
fix some cheetah errors
nilchia Oct 22, 2024
db796c2
calling env variables is R
nilchia Oct 22, 2024
9857527
better config indentation
nilchia Oct 22, 2024
b83228d
tring to fix the problem with cheetah variables
nilchia Oct 22, 2024
30754b5
cheetah in config
nilchia Oct 23, 2024
7c39d4b
update cside.xml
nilchia Oct 25, 2024
9ad3ea3
update macros and xml
nilchia Oct 29, 2024
d0f4bef
Merge branch 'galaxyproject:main' into spacexr
nilchia Dec 6, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions tools/spacexr/.shed.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
name: spacexr
owner: iuc
description: Cell type identification and cell type-specific differential expression in spatial transcriptomics
homepage_url: https://github.com/dmcable/spacexr/tree/master
long_description: Computational methods for cell type identification (RCTD) and differential expression (C-SIDE) on spatial transcriptomics datasets
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/spacexr
categories:
- Spatial Omics
- Single Cell
suite:
name: "suite_spacexr"
description: "A suite of Galaxy tools designed to work with the spacexr-tools collection."
type: repository_suite_definition
104 changes: 104 additions & 0 deletions tools/spacexr/macros.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
<macros>
<token name="@TOOL_VERSION@">2.2.1=r43hdfd78af_0</token>
<token name="@VERSION_SUFFIX@">0</token>
<token name="@PROFILE@">23.0</token>
<xml name="requirements">
<requirements>
<requirement type="package" version="@TOOL_VERSION@">r-spacexr</requirement>
<yield/>
</requirements>
</xml>
<xml name="edam">
<edam_topics>
<edam_topic>topic_4019</edam_topic>
<edam_topic>topic_4028</edam_topic>
<edam_topic>topic_3308</edam_topic>
</edam_topics>
<edam_operations>
<edam_operation>operation_3223</edam_operation>
</edam_operations>
</xml>
<token name="CSIDE_COMMON_RUN"><![CDATA[
#if str($type.cell_types) != '':
cell_types = c($cell_types),
#end if
cell_type_threshold = $type.cell_type_threshold,
gene_threshold = $type.gene_threshold,
doublet_mode = $type.doublet_mode,
#if str($type.weight_threshold) != '':
weight_threshold = $type.weight_threshold,
#end if
sigma_gene = $type.sigma_gene,
PRECISION.THRESHOLD = $type.precision_threshold,
#if str($type.cell_types_present) != '':
cell_types_present = c($cell_types_present),
#end if
fdr = $type.fdr,
test_genes_sig = $type.test_genes_sig,
logs = $type.logs
]]></token>
<token name="CSIDE_SINGLE_RUN"><![CDATA[
myRCTD <- run.CSIDE.single(myRCTD,
explanatory.variable,
normalize_expr = $type.normalize_expr,
log_fc_thresh = $type.log_fc_thresh,
fdr_method = "BH", # default
medv = $type.medv,
CSIDE_COMMON_RUN
)
]]></token>
<xml name="sanitizer">
<sanitizer invalid_char="">
<valid initial="string.ascii_letters,string.digits">
<add value="_" />
</valid>
</sanitizer>
</xml>
<xml name="patho_barcode" token_help="Comma separated barcodes of the pathological region.">
<param name="pathologic_barcode" type="text" optional="false" label="Barcodes" help="@HELP@">
<expand macro="sanitizer"/>
</param>
</xml>
<xml name="radius">
<param argument="radius" type="integer" min="0" value="50" label="Radius" help="The radius of the exponential filter. Approximately, the distance considered to be a relevant interaction."/>
</xml>
<xml name="cside_common_input">
<param argument="cell_types" type="text" optional="true" label="Cell types used for CSIDE" help="(Comma separated) If null, cell types will be chosen with aggregate occurences of at least 'cell type threshold'."/>
<param argument="cell_type_threshold" type="integer" min="0" value="125" label="Cell type threshold" help="Min occurence of number of cells for each cell type to be used."/>
<param argument="gene_threshold" type="float" min="0" value="0.00005" label="Gene threshold" help="Minimum average normalized expression required for selecting genes."/>
<param argument="doublet_mode" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Use RCTD doublet mode weights?" help="Otherwise, uses RCTD full mode weights." />
<param argument="weight_threshold" type="float" min="0" value="" optional="true" label="Weight threshold" help="The threshold of total normalized weights across all cell types in 'cell types' per pixel to be included in the model."/>
<param argument="sigma_gene" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Fit gene specific overdispersion parameter?" help="If FALSE, overdispersion parameter is same across all genes." />
<param argument="precision_threshold" type="float" min="0" value="0.05" label="Precision threshold" help="For checking for convergence, the maximum parameter change per algorithm step."/>
<param argument="cell_types_present" type="text" optional="true" label="Cell types present" help="(Comma separeated) cell types (a superset of 'cell types') to be considered as occuring often enough to consider for gene expression contamination during the step filtering out marker genes of other cell types."/>
<param argument="fdr" type="float" min="0" value="0.01" label="FDR" help="False discovery rate for hypothesis testing."/>
<param argument="test_genes_sig" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Genes will be tested for significance."/>
<param argument="logs" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="write progress to log?"/>
</xml>
<xml name="cside_single_input">
<expand macro="cside_common_input"/>
<param argument="normalize_expr" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Constrain total gene expression to sum to 1 in each condition?"/>
<param argument="log_FC_thresh" type="float" min="0" value="0.4" label="LogFC threshold" help="The natural log fold change cutoff for differential expression."/>
<param argument="medv" type="float" min="0" value="0.5" label="Explanatory.variable cutoff" help="For determining if enough pixels for each cell type have explanatory-variable greater than or less than this value."/>
</xml>
<xml name="output">
<section name="output" title="Output Options">
<param name="output_selector" type="select" multiple="true" optional="true" display="checkboxes" label="Select / Deselect all">
<option value="rds">RDS file</option>
<option value="rscript">R script</option>
<yield/>
</param>
</section>
</xml>
<xml name="citations">
<citations>
<citation type="doi">10.1038/s41587-021-00830-w</citation>
<citation type="doi">10.1038/s41592-022-01575-3</citation>
<citation type="bibtex">@Manual{github,
title = {SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics.},
author = {Dylan Cable},
url = {https://github.com/dmcable/spacexr}}
</citation>
</citations>
</xml>
</macros>
233 changes: 233 additions & 0 deletions tools/spacexr/spacexr_cside.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,233 @@
<tool id="spacexr_cside" name="CSIDE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
<description>Cell type-specific differential expression with C-SIDE</description>
<macros>
<import>macros.xml</import>
</macros>
<expand macro="edam"/>
<expand macro="requirements">
<requirement type="package" version="9.0">openssh</requirement>
</expand>
<command detect_errors="exit_code"><![CDATA[
export GALAXY_SLOTS=2 &&
mkdir -p 'inputs' 'results' 'logs' 'figures' &&
ln -s '$rctd' 'inputs/rctd.rds' &&
touch 'results/cside_script.R' &&
cat '$cside_script' > 'results/cside_script.R' &&
Rscript '$cside_script'
#if 'plots' in $output_selector:
mv 'results/de_plots/*.pdf' 'results/de_plots_quant/*.pdf' 'results/de_plots_two_regions/*.pdf' 'figures'
#end if
]]></command>
<configfiles>
<configfile name="cside_script"><![CDATA[
#if $type.cell_types != '':
#set $cell_types_list = ['"' + str(x.strip()) + '"' for x in str($type.cell_types).split(',')]
#set $cell_types = ','.join($cell_types_list)
#end if
#if str($type.cell_types_present) != '':
#set $cell_types_present_list = ['"' + str(x.strip()) + '"' for x in str($type.cell_types_present).split(',')]
#set $cell_types_present = ','.join($cell_types_present_list)
#end if
#if $type.de_type == 'point_density':
#set $pathologic_barcode_list = ['"' + str(x.strip()) + '"' for x in str($type.pathologic_barcode).split(',')]
#set $pathologic_barcode = ','.join($pathologic_barcode_list)
#end if
#if $type.de_type == 'custom':
#set $pathologic_barcode_list = ['"' + str(x.strip()) + '"' for x in str($type.pathologic_barcode).split(',')]
#set $region_list = [','.join($pathologic_barcode_list)]
#for $i, $region in enumerate($type.region):
#if str($region.next_pathologic_barcode) != '':
#set $next_pathologic_barcode_list = ['"' + str(x.strip()) + '"' for x in str($type.next_pathologic_barcode).split(',')]
#set $next_pathologic_barcode = [','.join($next_pathologic_barcode_list)]
$region_list.append($next_pathologic_barcode)
#end if
#end for
#end if

# cside script
# This file is used to specify the parameters for the cside from spacexr package

# Load the spacexr library
library('spacexr')
library('Matrix')
library('doParallel')

# load RCTD object
myRCTD <- readRDS('inputs/rctd.rds')
core <- Sys.getenv("GALAXY_SLOTS")
# set core

myRCTD@config[["max_cores"]] <- core

# CSIDE
#if str($type.de_type) == 'non_parametric':
myRCTD <- run.CSIDE.nonparam(myRCTD,
df = $type.df,
barcodes = NULL, # use all barcodes
CSIDE_COMMON_RUN
)

#else if str($type.de_type) == 'point_density':
pathogen_coords <- myRCTD@spatialRNA@coords[c($pathologic_barcode),]
barcodes <- colnames(myRCTD@spatialRNA@counts)
explanatory.variable <- exvar.point.density(myRCTD,
barcodes,
pathogen_coords,
radius = $type.radius
)
CSIDE_SINGLE_RUN


#else if str($type.de_type) == 'cell2cell':
barcodes <- colnames(myRCTD@spatialRNA@counts)
explanatory.variable <- exvar.celltocell.interactions(myRCTD,
barcodes,
'$type.cell_type',
radius = $type.radius
)
CSIDE_SINGLE_RUN


#else if str($type.de_type) == 'XY':
#if str($type.xy) == 'X':
explanatory.variable <- as.integer(myRCTD@spatialRNA@coords$x &gt; $type.lim)
names(explanatory.variable) <- rownames(myRCTD@spatialRNA@coords)

#else
explanatory.variable <- as.integer(myRCTD@spatialRNA@coords$y &gt; $type.lim)
names(explanatory.variable) <- rownames(myRCTD@spatialRNA@coords)
#end if
CSIDE_SINGLE_RUN

#else:
myRCTD <- run.CSIDE.regions(myRCTD,
$region_list,
log_fc_thresh = $type.log_fc_thresh,
CSIDE_COMMON_RUN
)
#end if


# save the results

# save significant genes in each cell type
cell_types <- names(myRCTD@de_results[["sig_gene_list"]])
for (cell_type in cell_types) {
df <- myRCTD@de_results[["sig_gene_list"]][[cell_type]]
assign(cell_type, df)
write.table(df, file = paste0("results/", cell_type, "_sig.tabular"), sep = "\t", quote = FALSE)
}
# save all genes in each cell type
cell_types <- names(myRCTD@de_results[["all_gene_list"]])
for (cell_type in cell_types) {
df <- myRCTD@de_results[["all_gene_list"]][[cell_type]]
assign(cell_type, df)
write.table(df, file = paste0("results/", cell_type, ".tabular"), sep = "\t", quote = FALSE)
}

# create plots
#if 'plots' in $output_selector:
make_all_de_plots(myRCTD, "figures")
#end if
# save rds file
#if 'rds' in $output_selector:
saveRDS(myRCTD, file = 'results/cside_results.rds')
#end if
]]></configfile>
</configfiles>
<inputs>
<param name="rctd" type="data" format="rds" label="RCTD object" help="annotated RCTD object"/>
<conditional name="type">
<param name="de_type" type="select" label="Type of covariates for explaining differential expression with C-SIDE">
<option value="non_parametric">Smooth spatial pattern (non-non_parametric)</option>
<option value="point_density">Proximity to pathology</option>
<option value="cell2cell">Cell-to-cell interaction</option>
<option value="XY">define X or Y axis</option>
<option value="custom">Custom spatial locations</option>
</param>
<when value="non_parametric">
<param argument="df" type="integer" min="0" value="15" label="Degrees of freedom" help="The degrees of freedom, or number of basis functions to be used in the model."/>
<expand macro="cside_common_input"/>
</when>
<when value="point_density">
<expand macro="patho_barcode"/>
<expand macro="radius"/>
<expand macro="cside_single_input"/>
</when>
<when value="cell2cell">
<param name="cell_type" type="text" optional="false" label="Cell type for which to compute density">
<expand macro="sanitizer"/>
</param>
<expand macro="radius"/>
<expand macro="cside_single_input"/>
</when>
<when value="XY">
<param name="lim" type="integer" value="" optional="false" label="Axis" help="The number on X or Y axis to discriminate two spatial regions"/>
<param name="xy" type="boolean" truevalue="X" falsevalue="Y" checked="true" label="Is the number on X axis?"/>
<expand macro="cside_single_input"/>
</when>
<when value="custom">
<expand macro="patho_barcode" help="Comma separated barcodes of the custom region."/>
<repeat name="region" min="0" title="Next custom region">
<param name="next_pathologic_barcode" type="text" optional="false" label="Barcodes" help="Comma separated barcodes of the custom region.">
<expand macro="sanitizer"/>
</param>
</repeat>
<expand macro="cside_common_input"/>
<param argument="log_FC_thresh" type="float" min="0" value="0.4" label="logFC cutoff for differential expression"/>
</when>
</conditional>
<expand macro="output">
<option value="plots">DEG plots</option>
</expand>
</inputs>
<outputs>
<collection name="de_results" type="list" label="${tool.name} on ${on_string}: DE Results">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.tabular$" format="tabular" directory="results"/>
</collection>
<collection name="de_plots" type="list" label="${tool.name} on ${on_string}: DE plots">
<discover_datasets pattern="(?P&lt;name&gt;.+)\.pdf$" format="pdf" directory="figures"/>
<filter>output['output_selector'] and 'plots' in output['output_selector']</filter>
</collection>
<data name="out_rds" format="rds" from_work_dir="results/cside_results.rds" label="${tool.name} on ${on_string}: RDS file">
<filter>output['output_selector'] and 'rds' in output['output_selector']</filter>
</data>
<data name="out_rscript" format="txt" from_work_dir="results/cside_script.R" label="${tool.name} on ${on_string}: RScript">
<filter>output['output_selector'] and 'rscript' in output['output_selector']</filter>
</data>
</outputs>
<tests>
<test expect_num_outputs="2">
<param name="rctd" value="myRCTD_merfish.rds"/>
<param name="de_type" value="non_parametric"/>
<param name="cell_types" value="Astrocytes"/>
<param name="cell_type_threshold" value="10"/>
<param name="gene_threshold" value="0.001"/>
<param name="fdr" value="0.25"/>
<param name="output_selector" value="rscript" />
<output_collection name="de_results" type="list">
<element name="type8" ftype="tabular">
<assert_contents>
<has_text_matching expression="Hello"/>
</assert_contents>
</element>
<element name="type8_sig" ftype="tabular">
<assert_contents>
<has_text_matching expression="Hello"/>
</assert_contents>
</element>
</output_collection>
<output name="out_rscript">
<assert_contents>
<has_text_matching expression="run.CSIDE.nonparam"/>
</assert_contents>
</output>
</test>
</tests>
<help><![CDATA[

Cell type-Specific Inference of Differential Expression, or CSIDE, is part of the spacexr R package for learning cell type-specific differential expression from spatial transcriptomics data.

]]></help>
<expand macro="citations" />
</tool>
Loading
Loading