-
Notifications
You must be signed in to change notification settings - Fork 441
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add spacexr #6212
base: main
Are you sure you want to change the base?
add spacexr #6212
Changes from 34 commits
8e3eb4a
a8c1906
460e8f2
88fd911
54ff8da
8f1fb30
3287e68
ea3ad72
74481cf
fac4e1c
3fca0f8
f848cff
26d2d47
e7d0fa3
6433511
1adcbed
16bb7e0
270f26d
7ba98dd
dae4129
544dd98
1e0bad9
8c2c3a4
f8dc7f7
8a7f38c
4b1c4ec
ea4ae35
7b30145
1bd854d
944acb2
84ceba2
55cf2ca
5fb6a97
73624f3
591ffd1
df5391f
3cdc55d
8e448d5
642ed70
db796c2
9857527
b83228d
30754b5
7c39d4b
9ad3ea3
d0f4bef
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
name: spacexr | ||
owner: iuc | ||
description: Cell type identification and cell type-specific differential expression in spatial transcriptomics | ||
homepage_url: https://github.com/dmcable/spacexr/tree/master | ||
long_description: Computational methods for cell type identification (RCTD) and differential expression (C-SIDE) on spatial transcriptomics datasets | ||
remote_repository_url: https://github.com/galaxyproject/tools-iuc/tree/master/tools/spacexr | ||
categories: | ||
- Spatial Omics | ||
- Single Cell | ||
suite: | ||
name: "suite_spacexr" | ||
description: "A suite of Galaxy tools designed to work with the spacexr-tools collection." | ||
type: repository_suite_definition |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
<macros> | ||
<token name="@TOOL_VERSION@">2.2.1</token> | ||
<token name="@VERSION_SUFFIX@">0</token> | ||
<token name="@PROFILE@">23.0</token> | ||
<xml name="requirements"> | ||
<requirements> | ||
<requirement type="package" version="@TOOL_VERSION@">r-spacexr</requirement> | ||
<yield/> | ||
</requirements> | ||
</xml> | ||
<xml name="edam"> | ||
<edam_topics> | ||
<edam_topic>topic_4019</edam_topic> | ||
<edam_topic>topic_4028</edam_topic> | ||
<edam_topic>topic_3308</edam_topic> | ||
</edam_topics> | ||
<edam_operations> | ||
<edam_operation>operation_3223</edam_operation> | ||
</edam_operations> | ||
</xml> | ||
<token name="CSIDE_COMMON_RUN"> | ||
#if $type.cell_type != '': | ||
cell_types = c($cell_types), | ||
#end if | ||
cell_type_threshold = $type.cell_type_threshold, | ||
gene_threshold = $type.gene_threshold, | ||
doublet_mode = $type.doublet_mode, | ||
#if str($weight_threshold) != '': | ||
weight_threshold = $type.weight_threshold, | ||
#end if | ||
sigma_gene = $type.sigma_gene, | ||
PRECISION.THRESHOLD = $type.PRECISION_THRESHOLD, | ||
#if str($cell_types_present) != '': | ||
cell_types_present = c($cell_types_present), | ||
#end if | ||
fdr = $type.fdr | ||
test_genes_sig = $type.test_genes_sig, | ||
logs = $type.logs | ||
</token> | ||
<token name="CSIDE_SINGLE_RUN"> | ||
myRCTD <- run.CSIDE.single(myRCTD, | ||
explanatory.variable, | ||
normalize_expr = $type.normalize_expr, | ||
log_fc_thresh = $type.log_fc_thresh, | ||
fdr_method = "BH", # default | ||
medv = $type.medv | ||
@CSIDE_COMMON_RUN@ | ||
) | ||
</token> | ||
<xml name="cside_common_input"> | ||
<param argument="cell_types" type="text" optional="true" label="Cell types used for CSIDE" help="(Comma separated) If null, cell types will be chosen with aggregate occurences of at least 'cell type threshold'."/> | ||
<param argument="cell_type_threshold" type="integer" min="0" value="125" label="Cell type threshold" help="Min occurence of number of cells for each cell type to be used."/> | ||
<param argument="gene_threshold" type="float" min="0" value="0.00005" label="Gene threshold" help="Minimum average normalized expression required for selecting genes."/> | ||
<param argument="doublet_mode" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Use RCTD doublet mode weights?" help="Otherwise, uses RCTD full mode weights." /> | ||
<param argument="weight_threshold" type="float" min="0" value="" optional="true" label="Weight threshold" help="The threshold of total normalized weights across all cell types in 'cell types' per pixel to be included in the model."/> | ||
<param argument="sigma_gene" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Fit gene specific overdispersion parameter?" help="If FALSE, overdispersion parameter is same across all genes." /> | ||
<param argument="PRECISION_THRESHOLD" type="float" min="0" value="0.05" label="Precision threshold" help="For checking for convergence, the maximum parameter change per algorithm step."/> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this all caps? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just because it is all caps in the function :) |
||
<param argument="cell_types_present" type="text" optional="true" label="Cell types present" help="(Comma separeated) cell types (a superset of 'cell types') to be considered as occuring often enough to consider for gene expression contamination during the step filtering out marker genes of other cell types."/> | ||
<param argument="fdr" type="float" min="0" value="0.01" label="FDR" help="False discovery rate for hypothesis testing."/> | ||
<param argument="test_genes_sig" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="true" label="Genes will be tested for significance."/> | ||
<param argument="logs" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="write progress to log?"/> | ||
</xml> | ||
<xml name="cside_single_input"> | ||
<expand macro="cside_common_input"/> | ||
<param argument="normalize_expr" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="false" label="Constrain total gene expression to sum to 1 in each condition?"/> | ||
<param argument="log_FC_thresh" type="float" min="0" value="0.4" label="LogFC threshold" help="The natural log fold change cutoff for differential expression."/> | ||
<param argument="medv" type="float" min="0" value="0.5" label="Explanatory.variable cutoff" help="For determining if enough pixels for each cell type have explanatory-variable greater than or less than this value."/> | ||
</xml> | ||
<xml name="citations"> | ||
<citations> | ||
<citation type="doi">10.1038/s41587-021-00830-w</citation> | ||
<citation type="doi">10.1038/s41592-022-01575-3</citation> | ||
<citation type="bibtex">@Manual{github, | ||
title = {SpatialeXpressionR: Cell type identification and cell type-specific differential expression in spatial transcriptomics.}, | ||
author = {Dylan Cable}, | ||
url = {https://github.com/dmcable/spacexr}} | ||
</citation> | ||
</citations> | ||
</xml> | ||
</macros> |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
<tool id="spacexr_cside" name="CSIDE" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> | ||
<description>Cell type-specific differential expression with C-SIDE</description> | ||
<macros> | ||
<import>macros.xml</import> | ||
</macros> | ||
<expand macro="edam"/> | ||
<expand macro="requirements"/> | ||
<command detect_errors="exit_code"><![CDATA[ | ||
#if $type.cell_types != '': | ||
#set $cell_types = [str(x.strip()) for x in str($type.cell_types).split(',')] | ||
#end if | ||
#if str($type.cell_types_present) != '': | ||
#set $cell_types_present = [str(x.strip()) for x in str($type.cell_types_present).split(',')] | ||
#end if | ||
#if $type.de_type == 'point_density': | ||
#set $pathologic_barcode = [str(x.strip()) for x in str($type.pathologic_barcode).split(',')] | ||
#end if | ||
#if $type.de_type == 'custom': | ||
#set $pathologic_barcode = [str(x.strip()) for x in str($type.pathologic_barcode).split(',')] | ||
#set $region_list = [$pathologic_barcode] | ||
#for $i, $region in enumerate($type.region): | ||
#if str($region.next_pathologic_barcode) != '': | ||
#set $next_pathologic_barcode = [str(x.strip()) for x in str($region.next_pathologic_barcode).split(',')] | ||
#set $region_list.append($next_pathologic_barcode) | ||
#end if | ||
#end for | ||
#end if | ||
mkdir -p 'inputs' 'results' 'logs' 'figures' && | ||
ln -s '$rctd' 'inputs/rctd.rds' && | ||
touch 'results/cside_script.R' && | ||
cat '$cside_script' > 'results/cside_script.R' && | ||
Rscript '$cside_script' | ||
#if str($output.plots) == 'True': | ||
mv 'results/de_plots/*.pdf' 'results/de_plots_quant/*.pdf' 'results/de_plots_two_regions/*.pdf' 'figures' | ||
#end if | ||
]]></command> | ||
<configfiles> | ||
<configfile name="cside_script"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use CDATA here |
||
# cside script | ||
# This file is used to specify the parameters for the cside from spacexr package | ||
|
||
# Load the spacexr library | ||
library('spacexr') | ||
library('Matrix') | ||
library('doParallel') | ||
|
||
# load RCTD object | ||
myRCTD <- readRDS('inputs/rctd.rds') | ||
|
||
# set core | ||
myRCTD@config$max_cores <- \${GALAXY_SLOTS:-2} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GALAXY_SLOTS will only work in bash. Here you are in R you need to access the environment variable GALAXY_SLOTS via an R function. The tests should fail here, if they are not - why are they not failing :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is a Cheetah syntax error before starting the job. I am trying to find it. |
||
|
||
# CSIDE | ||
#if $type.de_type == 'non_parametric': | ||
myRCTD <- run.CSIDE.nonparam(myRCTD, | ||
df = $type.df, | ||
barcodes = NULL, # use all barcodes | ||
@CSIDE_COMMON_RUN@ | ||
) | ||
|
||
#else if $type.de_type == 'point_density': | ||
pathogen_coords <- myRCTD@spatialRNA@coords[$pathologic_barcode,] | ||
barcodes <- colnames(myRCTD@spatialRNA@counts) | ||
explanatory.variable <- exvar.point.density(myRCTD, | ||
barcodes, | ||
pathogen_coords, | ||
radius = $type.radius | ||
) | ||
@CSIDE_SINGLE_RUN@ | ||
|
||
|
||
#else if $type.de_type == 'cell2cell': | ||
barcodes <- colnames(myRCTD@spatialRNA@counts) | ||
explanatory.variable <- exvar.celltocell.interactions(myRCTD, | ||
barcodes, | ||
'$type.cell_type', | ||
radius = $type.radius | ||
) | ||
@CSIDE_SINGLE_RUN@ | ||
|
||
|
||
#else if $type.de_type == 'XY': | ||
#if str($type.xy) == 'X': | ||
explanatory.variable <- as.integer(myRCTD@spatialRNA@coords$x > $type.lim) | ||
names(explanatory.variable) <- rownames(myRCTD@spatialRNA@coords) | ||
|
||
#else | ||
explanatory.variable <- as.integer(myRCTD@spatialRNA@coords$y > $type.lim) | ||
names(explanatory.variable) <- rownames(myRCTD@spatialRNA@coords) | ||
#end if | ||
@CSIDE_SINGLE_RUN@ | ||
|
||
#else: | ||
region_list <- $region_list | ||
myRCTD <- run.CSIDE.regions(myRCTD, | ||
region_list, | ||
log_fc_thresh = $type.log_fc_thresh, | ||
@CSIDE_COMMON_RUN@ | ||
) | ||
#end if | ||
|
||
|
||
# save the results | ||
|
||
# save significant genes in each cell type | ||
cell_types <- names(myRCTD@de_results$sig_gene_list) | ||
for (cell_type in cell_types) { | ||
df <- myRCTD@de_results$sig_gene_list[[cell_type]] | ||
assign(cell_type, df) | ||
write.table(df, file = paste0("results/", cell_type, "_sig.tabular"), sep = "\t", quote = FALSE) | ||
} | ||
# save all genes in each cell type | ||
cell_types <- names(myRCTD@de_results$all_gene_list) | ||
for (cell_type in cell_types) { | ||
df <- myRCTD@de_results$all_gene_list[[cell_type]] | ||
assign(cell_type, df) | ||
write.table(df, file = paste0("results/", cell_type, ".tabular"), sep = "\t", quote = FALSE) | ||
} | ||
|
||
# create plots | ||
#if str($output.plots) == 'True': | ||
make_all_de_plots(myRCTD, "figures") | ||
#end if | ||
# save rds file | ||
#if str($output.rds) == 'True': | ||
saveRDS(myRCTD, file = 'results/cside_results.rds') | ||
#end if | ||
</configfile> | ||
</configfiles> | ||
<inputs> | ||
<param name="rctd" type="data" format="rds" label="RCTD object" help="annotated RCTD object"/> | ||
<conditional name="type"> | ||
<param name="de_type" type="select" label="Type of covariates for explaining differential expression with C-SIDE"> | ||
<option value="non_parametric">Smooth spatial pattern (non-non_parametric)</option> | ||
<option value="point_density">Proximity to pathology</option> | ||
<option value="cell2cell">Cell-to-cell interaction</option> | ||
<option value="XY">define X or Y axis</option> | ||
<option value="custom">Custom spatial locations</option> | ||
</param> | ||
<when value="non_parametric"> | ||
<param argument="df" type="integer" min="0" value="15" label="Degrees of freedom" help="The degrees of freedom, or number of basis functions to be used in the model."/> | ||
<expand macro="cside_common_input"/> | ||
</when> | ||
<when value="point_density"> | ||
<param argument="pathologic_barcode" type="text" optional="false" label="Barcodes" help="Comma separated barcodes of the pathological region."/> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. some validator here? |
||
<param argument="radius" type="integer" min="0" value="50" label="Radius" help="The radius of the exponential filter. Approximately, the distance considered to be a relevant interaction."/> | ||
<expand macro="cside_single_input"/> | ||
</when> | ||
<when value="cell2cell"> | ||
<param argument="cell_type" type="text" optional="false" label="Cell type for which to compute density"/> | ||
<param argument="radius" type="integer" min="0" value="50" label="Radius" help="The radius of the exponential filter. Approximately, the distance considered to be a relevant interaction."/> | ||
<expand macro="cside_single_input"/> | ||
</when> | ||
<when value="XY"> | ||
<param name="lim" type="integer" value="" optional="false" label="Axis" help="The number on X or Y axis to discriminate two spatial regions"/> | ||
<param name="xy" type="boolean" truevalue="X" falsevalue="Y" checked="true" label="Is the number on X axis?"/> | ||
<expand macro="cside_single_input"/> | ||
</when> | ||
<when value="custom"> | ||
<param argument="pathologic_barcode" type="text" optional="false" label="Barcodes" help="Comma separated barcodes of the pathological region."/> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should this be a macro? its used a lot |
||
<repeat name="region" min="0" title="Next custom region"> | ||
<param argument="next_pathologic_barcode" type="text" optional="false" label="Barcodes" help="Comma separated barcodes of the pathological region."/> | ||
</repeat> | ||
<expand macro="cside_common_input"/> | ||
<param argument="log_FC_thresh" type="float" min="0" value="0.4" label="LogFC threshold" help="The natural log fold change cutoff for differential expression."/> | ||
</when> | ||
</conditional> | ||
<section name="output" title="Output options"> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. you can simply use a select box here with multiple=true |
||
<param name="plots" type="boolean" truevalue="True" falsevalue="False" checked="false" label="save plots?"/> | ||
<param name="rds" type="boolean" truevalue="True" falsevalue="False" checked="true" label="save RDS file?"/> | ||
<param name="rscript" type="boolean" truevalue="True" falsevalue="False" checked="false" label="save RScript?"/> | ||
</section> | ||
</inputs> | ||
<outputs> | ||
<collection name="de_results" type="list" label="${tool.name} on ${on_string}: DE Results"> | ||
<discover_datasets pattern="(?P<name>.+)\.tabular$" format="tabular" directory="results"/> | ||
</collection> | ||
<collection name="de_plots" type="list" label="${tool.name} on ${on_string}: DE plots"> | ||
<discover_datasets pattern="(?P<name>.+)\.pdf$" format="pdf" directory="figures"/> | ||
<filter>output['plots']</filter> | ||
</collection> | ||
<data name="out_rds" format="rds" from_work_dir="results/cside_results.rds" label="${tool.name} on ${on_string}: RDS file"> | ||
<filter>output['rds']</filter> | ||
</data> | ||
<data name="out_rscript" format="txt" from_work_dir="results/cside_script.R" label="${tool.name} on ${on_string}: RScript"> | ||
<filter>output['rscript']</filter> | ||
</data> | ||
</outputs> | ||
<tests> | ||
<test expect_num_outputs="2"> | ||
<param name="rctd" value="rds_doublet.rds"/> | ||
<param name="de_type" value="non_parametric"/> | ||
<param name="cell_types" value="type8"/> | ||
<param name="rds" value="False"/> | ||
<param name="rscript" value="True"/> | ||
<output_collection name="de_results" type="list"> | ||
<element name="type8" ftype="tabular"> | ||
<assert_contents> | ||
<has_text_matching expression="Hello"/> | ||
</assert_contents> | ||
</element> | ||
<element name="type8_sig" ftype="tabular"> | ||
<assert_contents> | ||
<has_text_matching expression="Hello"/> | ||
</assert_contents> | ||
</element> | ||
</output_collection> | ||
<output name="out_rscript"> | ||
<assert_contents> | ||
<has_text_matching expression="run.CSIDE.nonparam"/> | ||
</assert_contents> | ||
</output> | ||
</test> | ||
</tests> | ||
<help><![CDATA[ | ||
|
||
Cell type-Specific Inference of Differential Expression, or CSIDE, is part of the spacexr R package for learning cell type-specific differential expression from spatial transcriptomics data. | ||
|
||
]]></help> | ||
<expand macro="citations" /> | ||
</tool> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can use CDATA here and than avoid $lt;
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
THANKS!! :)