galaxyproject · lldelisle · Oct 20, 2023 · Oct 3, 2023 · Oct 3, 2023 · Oct 3, 2023
diff --git a/workflows/scRNAseq/baredsc/.dockstore.yml b/workflows/scRNAseq/baredsc/.dockstore.yml
@@ -0,0 +1,20 @@
+version: 1.2
+workflows:
+- name: baredSC-1d-logNorm
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /baredSC-1d-logNorm.ga
+  testParameterFiles:
+  - /baredSC-1d-logNorm-tests.yml
+  authors:
+  - name: Lucille Delisle
+    orcid: 0000-0002-1964-4960
+- name: baredSC-2d-logNorm
+  subclass: Galaxy
+  publish: true
+  primaryDescriptorPath: /baredSC-2d-logNorm.ga
+  testParameterFiles:
+  - /baredSC-2d-logNorm-tests.yml
+  authors:
+  - name: Lucille Delisle
+    orcid: 0000-0002-1964-4960
diff --git a/workflows/scRNAseq/baredsc/CHANGELOG.md b/workflows/scRNAseq/baredsc/CHANGELOG.md
@@ -0,0 +1,5 @@
+# Changelog
+
+## [0.1] 2023-10-03
+
+First release.
diff --git a/workflows/scRNAseq/baredsc/README.md b/workflows/scRNAseq/baredsc/README.md
@@ -0,0 +1,37 @@
+# BaredSC Workflows
+
+These workflows allow to run a baredSC analysis from a table with counts in a single click. It uses models from 1 to N Gaussians and combine them. It uses the logNorm scale, 100 bins for 1 dimension and 25 bins on each axis in 2 dimensions.
+
+## Inputs dataset
+
+- Both workflows need a tabular dataset where each row is a cell. The tabular needs to have a header line with column names. There must be at least two columns: 'nCount_RNA' and another one with the counts for the gene(s) of interest. A way to get such table in R from a Seurat object (`seurat.obj`) is:
+
+```r
+my.genes <- c("Hoxa13", "Hoxd13")
+df <- cbind(seurat.obj[[]], # This will give you all metadata including nCount_RNA
+            FetchData(seurat.obj, slot = "counts", vars = my.genes))
+
+write.table(df, "input_for_baredSC.txt", quote = F, sep = "\t", row.names = F)
+```
+
+## Inputs values
+
+For the 1D:
+
+- Gene name: The name of the column with the counts of your gene of interest.
+- Maximum value in logNorm: The maximum value to explore in PDF. This value should be large enough so the PDF is at 0 at this value.
+- Maximum number of Gaussians to study: All models between models with 1 Gaussians to models with this number of Gaussians will be combined.
+
+For the 2D:
+
+- Gene name for x axis: The name of the column with the counts of your gene in x axis.
+- Gene name for y axis: The name of the column with the counts of your gene in y axis.
+- maximum value in logNorm for x-axis: The maximum value to explore in PDF in the x axis. This value should be large enough so the PDF is at 0 at this value.
+- maximum value in logNorm for y-axis: The maximum value to explore in PDF in the y axis. This value should be large enough so the PDF is at 0 at this value.
+- Maximum number of Gaussians to study: All models between models with 1 2D-Gaussians to models with this number of 2D-Gaussians will be combined.
+
+## Processing
+
+- The workflow will generate paramater values from 1 to the maximum number of Gaussians to study.
+- baredSC_1d or baredSC_2d is run for each of these number of Gaussians
+- All models are combined into a single result.
diff --git a/workflows/scRNAseq/baredsc/baredSC-1d-logNorm-tests.yml b/workflows/scRNAseq/baredsc/baredSC-1d-logNorm-tests.yml
@@ -0,0 +1,174 @@
+- doc: Test outline for baredSC_1d
+  job:
+    Tabular with raw expression values:
+      class: File
+      path: test-data/nih3t3_generated_2d_2.txt
+      filetype: tabular
+    Gene name: 0.5_0_0_0.5_x
+    Maximum value in logNorm: '2.5'
+    Maximum number of Gaussians to study: '4'
+  outputs:
+    baredsc_numpy:
+      element_tests:
+        split_file_000000.tabular:
+          asserts:
+            has_size:
+              value: 1257919
+              delta: 100000
+        split_file_000001.tabular:
+          asserts:
+            has_size:
+              value: 1601519
+              delta: 100000
+        split_file_000002.tabular:
+          asserts:
+            has_size:
+              value: 2180423
+              delta: 200000
+        split_file_000003.tabular:
+          asserts:
+            has_size:
+              value: 28234812
+              delta: 2000000
+    baredsc_qc_plots:
+      element_tests:
+        split_file_000000.tabular:
+          element_tests:
+            convergence:
+              asserts:
+                has_size:
+                  value: 20304
+                  delta: 1000
+            corner:
+              asserts:
+                has_size:
+                  value: 63603
+                  delta: 6000
+            p:
+              asserts:
+                has_size:
+                  value: 45302
+                  delta: 4000
+        split_file_000001.tabular:
+          element_tests:
+            convergence:
+              asserts:
+                has_size:
+                  value: 27704
+                  delta: 2000
+            corner:
+              asserts:
+                has_size:
+                  value: 387365
+                  delta: 30000
+            p:
+              asserts:
+                has_size:
+                  value: 97623
+                  delta: 90000
+        split_file_000002.tabular:
+          element_tests:
+            convergence:
+              asserts:
+                has_size:
+                  value: 32463
+                  delta: 3000
+            corner:
+              asserts:
+                has_size:
+                  value: 1103840
+                  delta: 100000
+            p:
+              asserts:
+                has_size:
+                  value: 159797
+                  delta: 10000
+        split_file_000003.tabular:
+          element_tests:
+            convergence:
+              asserts:
+                has_size:
+                  value: 31343
+                  delta: 3000
+            corner:
+              asserts:
+                has_size:
+                  value: 2139411
+                  delta: 200000
+            p:
+              asserts:
+                has_size:
+                  value: 175774
+                  delta: 10000
+    baredsc_neff:
+      element_tests:
+        split_file_000000.tabular:
+          asserts:
+            has_n_lines:
+              n: 1
+            has_line_matching:
+              expression: "80[0-9][0-9].[0-9]*"
+        split_file_000001.tabular:
+          asserts:
+            has_n_lines:
+              n: 1
+            has_line_matching:
+              expression: "13[0-9][0-9].[0-9]*"
+        split_file_000002.tabular:
+          asserts:
+            has_n_lines:
+              n: 1
+            has_line_matching:
+              expression: "2[0-9][0-9].[0-9]*"
+        split_file_000003.tabular:
+          asserts:
+            has_n_lines:
+              n: 1
+            has_line_matching:
+              expression: "[7-9][0-9][0-9].[0-9]*"
+    combined_other_outputs:
+      element_tests:
+        individuals:
+          asserts:
+            has_size:
+              value: 108407
+              delta: 10000
+        means:
+          asserts:
+            has_n_lines:
+              n: 99998
+              delta: 4000
+            has_line_matching:
+              expression: "6.[0-9]*e-01"
+        posterior_andco:
+          asserts:
+            has_size: 
+              value: 197980
+              delta: 10000
+        posterior_individuals:
+          asserts:
+            has_size: 
+              value: 105262
+              delta: 10000
+        posterior_per_cell:
+          asserts:
+            has_n_lines:
+              n: 2362
+            has_line_matching:
+              expression: "mu\tsd"
+        with_posterior:
+          asserts:
+            has_size: 
+              value: 234303
+              delta: 20000
+    combined_pdf:
+      asserts:
+        has_line:
+          line: "x\tlow\tmean\thigh\tmedian"
+        has_text:
+          text: "0.0125\t"
+    combined_plot:
+      path: test-data/combined_1d_plot.png
+      compare: sim_size
+      delta_frac: 0.1
+