diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers.png new file mode 100644 index 00000000000000..707f7ae3920aa9 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers_SCT.png new file mode 100644 index 00000000000000..706e40bfe897ba Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers.png new file mode 100644 index 00000000000000..085139c5a550c5 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers_SCT.png new file mode 100644 index 00000000000000..70fd4bdae9d9a0 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers.png new file mode 100644 index 00000000000000..6438f17b331b8e Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers_SCT.png new file mode 100644 index 00000000000000..c83279eea83172 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_T_Markers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_T_Markers_SCT.png new file mode 100644 index 00000000000000..465a5e054bd4ee Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_FeaturePlot_T_Markers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin.png new file mode 100644 index 00000000000000..71545de67e6594 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin_SCT.png new file mode 100644 index 00000000000000..a4a1d2f7aa7d31 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots.png new file mode 100644 index 00000000000000..339ebc38914ed1 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots_SCT.png new file mode 100644 index 00000000000000..67808e7548302c Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots.png new file mode 100644 index 00000000000000..663f95bd1698e7 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots_SCT.png new file mode 100644 index 00000000000000..11b60d33bfc3d2 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot.png new file mode 100644 index 00000000000000..d0251a14bfed87 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot_SCT.png new file mode 100644 index 00000000000000..73b88a7d53278d Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_after.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_after.png new file mode 100644 index 00000000000000..8dc3b75816f590 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_after.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_before.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_before.png new file mode 100644 index 00000000000000..c1b336d38b2c89 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_QC_scatter_before.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot.png new file mode 100644 index 00000000000000..eb1e0db0f5ab98 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes.png new file mode 100644 index 00000000000000..c3efd835d236eb Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes_SCT.png new file mode 100644 index 00000000000000..41ee4605f7b78a Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_SCT.png new file mode 100644 index 00000000000000..39db543cac512e Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes.png new file mode 100644 index 00000000000000..52b53a4fae10e5 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes_SCT.png new file mode 100644 index 00000000000000..d4e0ee8f8ba72c Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers.png new file mode 100644 index 00000000000000..b0fdfa5185d7ba Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers_SCT.png new file mode 100644 index 00000000000000..c434fcce46c574 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_T_Markers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_T_Markers_SCT.png new file mode 100644 index 00000000000000..bd1fa898694d7e Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_Violin_T_Markers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot.png new file mode 100644 index 00000000000000..84c7d9224177ec Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot_SCT.png new file mode 100644 index 00000000000000..f2c678741dc2e6 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_elbowplot_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1.png new file mode 100644 index 00000000000000..e1680775a2a9c5 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15.png new file mode 100644 index 00000000000000..d24ada925a443c Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15_SCT.png new file mode 100644 index 00000000000000..bab74955209f02 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_SCT.png new file mode 100644 index 00000000000000..1ca09ee3887b9b Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin.png new file mode 100644 index 00000000000000..4d0febba21d000 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin_SCT.png new file mode 100644 index 00000000000000..105bdb29af1730 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_topgenes_violin_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes.png new file mode 100644 index 00000000000000..fe29ff818fe1a6 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes_SCT.png new file mode 100644 index 00000000000000..c9fcad51e8aabe Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_variable_genes_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_QC_before.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_QC_before.png new file mode 100644 index 00000000000000..d164a226eb0a4e Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_QC_before.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers.png new file mode 100644 index 00000000000000..71968df78bd83b Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers_SCT.png new file mode 100644 index 00000000000000..1678c9e4cd7723 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers_SCT.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_after.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_after.png new file mode 100644 index 00000000000000..413e1f33288545 Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_violin_after.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings.png new file mode 100644 index 00000000000000..df8fc4c22bdf2d Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings.png differ diff --git a/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings_SCT.png b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings_SCT.png new file mode 100644 index 00000000000000..d03e05386b88ad Binary files /dev/null and b/topics/single-cell/images/scrna-seurat-pbmc3k/seurat_vizdimloadings_SCT.png differ diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/data-library.yaml b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/data-library.yaml new file mode 100644 index 00000000000000..1e3752554daf21 --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/data-library.yaml @@ -0,0 +1,27 @@ +--- +destination: + type: library + name: GTN - Material + description: Galaxy Training Network Material + synopsis: Galaxy Training Network Material. See https://training.galaxyproject.org +items: +- name: Transcriptomics + description: Training material for all kinds of transcriptomics analysis. + items: + - name: Clustering 3K PBMCs with Scanpy + items: + - name: 'DOI: 10.5281/zenodo.3581213' + description: latest + items: + - url: https://zenodo.org/api/files/78a8ec7e-3357-44b8-854d-4a9d772b4589/barcodes.tsv + src: url + ext: tsv + info: https://zenodo.org/record/3581213 + - url: https://zenodo.org/api/files/78a8ec7e-3357-44b8-854d-4a9d772b4589/genes.tsv + src: url + ext: tsv + info: https://zenodo.org/record/3581213 + - url: https://zenodo.org/api/files/78a8ec7e-3357-44b8-854d-4a9d772b4589/matrix.mtx + src: url + ext: mtx + info: https://zenodo.org/record/3581213 diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.bib b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.bib new file mode 100644 index 00000000000000..b910e71667cf69 --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.bib @@ -0,0 +1,30 @@ +@Article{seurat2023v5, + author = {Yuhan Hao and Tim Stuart and Madeline H Kowalski and Saket Choudhary and Paul Hoffman and Austin Hartman and Avi Srivastava and Gesmira Molla and Shaista Madad and Carlos Fernandez-Granda and Rahul Satija}, + title = {Dictionary learning for integrative, multimodal and scalable single-cell analysis}, + journal = {Nature Biotechnology}, + year = {2023}, + doi = {10.1038/s41587-023-01767-y}, + url = {https://doi.org/10.1038/s41587-023-01767-y}, +} + +@article{SCTransform2022, +title={Comparison and evaluation of statistical error models for scrna-seq}, +DOI={10.1186/s13059-021-02584-9}, +volume={23}, +number={1}, +journal={Genome Biology}, +author={Choudhary, Saket and Satija, Rahul}, +year={2022}, +month={Jan}} + +@article{tekman2020single, + title={A single-cell RNA-sequencing training and analysis suite using the Galaxy framework}, + author={Tekman, Mehmet and Batut, B{\'e}r{\'e}nice and Ostrovsky, Alexander and Antoniewski, Christophe and Clements, Dave and Ramirez, Fidel and Etherington, Graham J and Hotz, Hans-Rudolf and Scholtalbers, Jelle and Manning, Jonathan R and others}, + journal={GigaScience}, + DOI={10.1093/gigascience/giaa102}, + volume={9}, + number={10}, + pages={giaa102}, + year={2020}, + publisher={Oxford University Press} +} diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.md b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.md new file mode 100644 index 00000000000000..1912161226971f --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/tutorial.md @@ -0,0 +1,1757 @@ +--- +layout: tutorial_hands_on + +title: "Clustering 3K PBMCs with Seurat" +level: Introductory +subtopic: firstsc +priority: 3 +zenodo_link: 'https://zenodo.org/record/3581213' +answer_histories: + - label: "Using Separate Preprocessing Tools" + history: https://usegalaxy.eu/u/marisa_jl/h/clustering-3k-pbmcs-with-seurat---separate-preprocessing---answer-key + date: 2023-10-28 + - label: "Using SCTransform" + history: https://usegalaxy.eu/u/marisa_jl/h/clustering-3k-pbmcs-with-seurat---sctransform---answer-key + date: 2023-10-28 +questions: +- How can we identify cell types in single cell RNA-Seq data? +- What are the steps for clustering single cell data with Seurat? +objectives: +- Explain the steps involved in clustering single cell data +- Evaluate the quality of single cell data and filter out low quality cells +- Prepare single cell data for analysis with Seurat +- Perform clustering with Seurat +- Be ready to apply the Seurat pipeline to new datasets +time_estimation: 8H +key_points: +- Seurat is a commonly-used pipeline for single cell data analysis +- Clustering makes single cell datasets easier for us to understand +- Different tools and parameters should be considered when analysing different datasets +requirements: +- + type: "internal" + topic_name: single-cell + tutorials: + - scrna-preprocessing + - scrna-preprocessing-tenx +tags: +- 10x +contributors: +- MarisaJL + +--- + +Single cell RNA-seq analysis enables us to explore differences in gene expression between cells. It can reveal the heterogenity within cell populations and help us to identify cell types that could play roles in development, disease, or other processes. Single cell omics is a relatively young field, but there are a few commonly-used analysis pipelines that you will often see in the literature. In this tutorial, we will use one of these pipelines, Seurat, to cluster single cell data from a 10X Genomics experiment ({% cite seurat2023v5 %}). You can follow the same analysis using the Scanpy pipeline in the [Clustering 3K PBMCs with Scanpy]({% link topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.md %}) tutorial. + +Clustering is typically the first type of analysis we will perform on a single cell dataset. It groups together cells that are expressing similar genes, which makes the data easier to understand and often helps us to identify specific cell types. + +{% snippet topics/single-cell/faqs/single_cell_omics.md %} + +{% snippet faqs/galaxy/tutorial_mode.md %} + +> +> +> This tutorial is based on the [Seurat - Guided Clustering Tutorial](https://satijalab.org/seurat/articles/pbmc3k_tutorial). The SCTransform sections also draw from the [Using sctransform in Seurat](https://satijalab.org/seurat/articles/sctransform_vignette.html) tutorial. +> +{: .comment} + +> +> +> In this tutorial, we will cover: +> +> 1. TOC +> {:toc} +> +{: .agenda} + +# Important tips for easier analysis + +{% snippet faqs/galaxy/tutorial_mode.md %} + +{% snippet faqs/galaxy/analysis_troubleshooting.md sc=true %} + +# Data + +For this tutorial, we will analyze a dataset of Peripheral Blood Mononuclear Cells (PBMC) extracted from a healthy donor, which is freely available from 10X Genomics. The dataset contains 2700 single cells sequenced using Illumina NextSeq 500. The raw sequences have been processed by the [**cellranger**](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/what-is-cell-ranger) pipeline from 10X to extract a unique molecular identifier (UMI) count matrix, in a similar way to that explained in the [Pre-processing of 10X Single-Cell RNA Datasets]({% link topics/single-cell/tutorials/scrna-preprocessing-tenx/tutorial.md %}) tutorial. + +In this matrix, the values represent the number of each feature (i.e. gene; row) detected in each cell (column). Single cell matrices can be quite large: here there are 2700 columns with 32,738 rows, so for each of our 2700 cells we know how many times we found RNAs matching each of the 32,738 genes. Since most of these genes weren't detected in most of the cells, the matrix is largely filled with zeros, i.e. it is an extremely sparse matrix. To optimize the storage of such a table and the information about the genes and cells, **cellranger** creates 3 files: + +- `genes.tsv`: a tabular file with information about the 32,738 genes in 2 columns (Ensembl gene id and the gene symbol) +- `barcodes.tsv`: a tabular file with the barcode for each of the 2700 cells +- `matrix.mtx`: a condensed version of the count matrix (including the non-zero values only) + + The count matrix is represented by its non-zero values - we don't need to store all of those zeroes as long as we know where our non-zero values are in the matrix. Each non-zero value is represented by its line number (1st column), its column number (2nd column) and its value (3rd column). The first row gives the total number of rows (genes), columns (cells) and non-zero values. More information on the Matrix Market Exchange (mtx) format can be found [in this documentation](https://math.nist.gov/MatrixMarket/formats.html) + +## Data upload + +> Data upload +> +> 1. Create a new history for this tutorial +> +> 2. Import the `genes.tsv`, `barcodes.tsv` and `matrix.mtx` from [Zenodo]({{ page.zenodo_link }}) or from the shared data library +> +> ``` +> {{ page.zenodo_link }}/files/genes.tsv +> {{ page.zenodo_link }}/files/barcodes.tsv +> {{ page.zenodo_link }}/files/matrix.mtx +> ``` +> +> {% snippet faqs/galaxy/datasets_import_via_link.md %} +> +> {% snippet faqs/galaxy/datasets_import_from_data_library.md %} +> +> 3. Rename the datasets as `genes`, `barcodes`, and `matrix` if necessary +> +> {% snippet faqs/galaxy/datasets_rename.md %} +> +> 4. Check the datatypes are correct - the `genes` and `barcodes` files should be tsv or tabular while the `matrix` should be an mtx file +> +> {% snippet faqs/galaxy/datasets_change_datatype.md %} +> +> 5. Inspect the `matrix` file by clicking on the {% icon galaxy-eye %} icon +> +> {% snippet faqs/galaxy/histories_dataset_item.md %} +> +{: .hands_on} + +The beginning of the file should look like this: + +> +> +> ``` +> 32738 2700 2286884 +> 32709 1 4 +> 32707 1 1 +> 32706 1 10 +> 32704 1 1 +> ``` +> +> 1. How many non-zero values are in the matrix? +> 2. How many counts were found for the 32,706th gene in the 1st cell? +> +> > +> > +> > 1. The first row tells us there are 2,286,884 non-zero values for the 32,738 genes (rows) and 2,700 cells (columns) - so only 2.6% of the 88,392,600 potential values we could have in this matrix are non-zero. Getting rid of all those zeros has made the matrix much more compact. +> > 2. 10 counts were found for the 32,706th row (gene) and 1st column (cell), so we collected 10 RNAs that the first cell had produced from this particular gene. +> > +> {: .solution} +> +{: .question} + +Representing the matrix with these three files is convenient for sharing the data, but not for processing them. Different single cell analysis packages have attempted to solve the problem of storage and analysis by inventing their own formats, which has led to the proliferation of many different 'standards' in the scRNA-seq package ecosystem. + +## SeuratObject + +In order to analyse the data using Seurat, we will first need to create a SeuratObject. A SeuratObject can store all our data, including the gene names, cell barcodes and matrix in a single RDS file. Since Seurat was written in the R programming language, SeuratObjects are saved as RData or RDS files. An RDS file is simply a type of RData file that contains a single object. + +SeuratObjects can also hold any metadata we might have about our cells or genes, as well as the information we will produce during our analysis, such as our dimensional reductions and cell clusters. A SeuratObject can hold data in multiple layers (also known as slots in earlier version of Seurat). We can have layers of raw counts, normalised data, and scaled data. We could also store multiple assays or types of data in a single object, although in this tutorial we will only be using one assay for RNA-seq data. The SeuratObject is designed to be self-contained so that we don't have to work with lots of different files, even if we have different versions or types of data. + +Creating a SeuratObject in R would require two steps - first, we would need to read in our data, in this case using the `Read10X` function, then secondly we would turn it into a SeuratObject using the `CreateSeuratObject` function. On Galaxy, we can perform both steps with a single tool. The `CreateSeuratObject` function also generates some QC metrics and performs basic filtering of the data. + +> Create a SeuratObject +> +> 1. {% tool [Seurat Create](toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy1) %} with the following parameters: +> - *"Method used"*: `Create Seurat Object` +> - *"Select format of input"*: `matrix market (for e.g. 10x data)` +> - {% icon param-file %} *"Counts matrix with features as rows, cells as columns (.mtx)"*: `matrix.mtx` +> - {% icon param-file %} *"List of gene names (for rows)"*: `genes.tsv` +> - {% icon param-file %} *"List of cell barcodes (for columns)"*: `barcodes.tsv` +> - *"Include features detected in at least this many cells"*: `3` +> - *"Include cells where at least this many features are detected"*: `200` +> - *"Calculate percentage of mito genes in each cell"*: `No` +> +> 2. Rename the generated file to `Input 3k PBMC` +> +> 3. Check that the format is `rds` +{: .hands_on} + +We can't look at the RDS file directly as it is designed for computers to read, rather than humans, but the Seurat tools will now be able to interact with the data. We can also use the {% tool Seurat Data Management %} tool to look inside our new SeuratObject. + +> Inspect the `Input 3k PBMC` SeuratObject +> +> 1. {% tool [Seurat Data Management](toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0) %} with the following parameters: +> - *"Method used"*: `Inspect Seurat Object` +> - *"Display information about"*: `General` +> +> 2. Click the {% icon galaxy-eye %} on the output in your history to see some information about your SeuratObject +> +{: .hands_on} + +> +> +> 1. How many cells and genes are in the `Input 3k PBMC` SeuratObject we just created? +> 2. Is this the same as the number of cells and genes we started out with? +> +> > +> > 1. If we click on the {% icon galaxy-eye %} of the output from {% tool Seurat Data Management %} we can see some general information about our SeuratObject. We can see that it contains information about 2700 samples (cells) and 13,714 features (genes). +> > 2. When we looked at the `matrix.mtx` file previously, we saw that it contained information on the same 2700 cells, but that there were 32,738 genes with non-zero values in the matrix. That means we are missing 19,024 genes - but don't worry, this is supposed to happen! Remember that we set the `Include features detected in at least this many cells` parameter to `3` - this got rid of any genes that were found in fewer than three cells. We assume that these genes are just technical noise (e.g. misreads, reads mapped to the wrong gene) rather than real biological features. We also set the `Include cells where at least this many features are detected` to `200` - this would have removed any cells that had fewer than 200 genes found in them. In this case, we didn't remove any cells as all 2700 of them must have had at least 200 genes. If we did have cells with fewer than 200 genes, we would want to get rid of them they are probably low quality cells or cell fragments. We would usually expect to find more features in an undamaged cell that was captured and sequenced properly. +> > +> {: .solution} +{: .question} + +> +> If you would like to see the impact of the initial filtering performed by {% tool Seurat Create %}, then you can rerun it step with these filters removed (or changed) and then perform the QC visualisation steps described in the next section on the unfiltered dataset. The Seurat pipeline usually includes some filtering during object creation, which shouldn't cause any problems as long as we use reasonable thresholds that only filter out the lowest quality cells and genes. We can always come back and create a new object with different values if we suspect they weren't right for our dataset once we make our QC plots or if we have problems later on in the analysis. +{: .comment} + +# Preprocessing + +The standard Seurat pipeline begins with preprocessing, which includes identifying and filtering out low-quality cells, normalising and scaling the data, and selecting highly variable features. + +## Quality Control + +Quality control is an essential step in preparing single cell data for analysis. The results from a single cell experiment can vary in quality due to cell damage during dissociation, capturing more than one cell in a well or droplet, failures in library preparation such as inefficient reverse transcription (copying of captured RNAs to cDNA) or PCR amplification (making more copies of the cDNA so we have enough material for sequencing) or other technical problems. Low quality cells can cause issues for some analysis tools and may contribute to misleading results. + +> Impact of low-quality cells on the downstream analyses +> +> #### Formation of their own distinct cluster(s) +> +> The most obvious cause of this problem is that damaged cells can appear more similar to each other because they have increased proportions of mitochondrial genes or are enriched for certain damage-induced genes. We could end up with a cluster made up of different cell types that share similar RNAs because they've been damaged. We might think these damaged cells represent an intermediate state between other cell types or end up misinterpreting our data in other ways. +> +> #### Distortion of population heterogeneity during variance estimation or principal components analysis +> +> When we cluster single cell data, we're looking for the biggest differences between groups of cells. If we have lots of low-quality cells, then the biggest differences we'll see might be these differences in quality, rather than something more biologically interesting. Differences in quality can have a big impact because low-quality cells often have low total RNA counts. When we perform scaling and normalisation on these cells during preprocessing, this can make the variances for the genes they do express much bigger than for other cells. When we select the most variable genes in the dataset, we'll end up picking the ones expressed by low-quality cells. We'll use these in our PCA analysis, likely ending up with top principal components based on cell quality rather than biology, making it harder to detect the differences we're actually interested in. +> +> #### Misidentification of upregulated genes +> +> Another problem that arises when we apply our preprocessing steps to small, low quality cells is that the genes we detect in them can appear to be strongly upregulated. Since we didn't detect many other genes in these cells, even a small difference in the number of transcripts detected can end up becoming much larger after normalisation. For example, contaminating transcripts may be present in all cells at low but constant levels. With the increased scaling and normalization in low-quality cells, the small counts for these transcripts may become large normalized expression values, so we might think we've found a population of cells where these genes are upregulated. +> +{: .details} + +In order to mitigate these problems, we need to remove low-quality cells at the start of the analysis. + +The `CreateSeuratObject` function we used in the {% tool Seurat Create %} tool automatically calculates some QC metrics and allowed us to filter out the lowest quality cells and features immediately. We will now check the data to see if it requires any further filtering. + +### Computation of QC metrics + +We already have two useful QC metrics that were calculated when we created our SeuratObject: + +- **nCount_RNA** the total sum of RNAs that were found in each cell. + The total number of counts is related to cell size, but it can also be an indication of quality. If `nCount_RNA` is very high we might be looking at results from a doublet or multiplet - two or more cells that were isolated together during the experiment. If `nCount_RNA` is very low, then it is likely that we lost a lot of the RNA due to cell lysis (breakage) or inefficient cDNA capture and amplification - or perhaps we only captured a fragment of a cell. +- **nFeature_RNA** the number of unique genes that were detected in each cell. + We would expect to see some variation in the variety of genes expressed by cells of different sizes, types, and conditions, but if this number is unusually high or low then it could be a sign of poor quality. High `nFeature_RNA` could be another sign of a doublet or multiplet - we might have captured cells of different types and processed them together. Low `nFeature_RNA` could be due to loss of RNA if `nCount_RNA` is also low, or a sign that we have failed to capture the diversity of the transcript population, perhaps due to technical problems in our experiment. + +One other metric that is often used to assess cell quality is the proportion of reads that came from the mitochondrial genome. The proportion is often higher in low quality, damaged or dying cells. Mitochondrial RNAs, which are protected inside the mitochondrial membranes, can be the last RNAs to be degraded or lost from a damaged cell, so we can end up with higher proportions of them in a low quality cell. We could have calculated the proportion of mitochondrial genes while creating our SeuratObject, but we will calculate it separately here to see how it is done - and how we could do the same for other types of genes. + +We can identify mitochondrial genes from their gene symbols. Human genes that are encoded in the mitochondrial DNA (rather than in the cell nucleus) have names beginning with 'MT-'. Different naming patterns are used in other species. The {% tool Seurat Create %} tool can identify genes based on a specific naming pattern and then calculate the proportion of reads in each cell that came from these genes. We could use this tool to calculate the proportions of other genes with shared naming patterns too, such as ribosomal genes. + +> Calculate the Proportion of Mitochondrial Reads +> +> 1. {% tool [Seurat Create](toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy1) %} with the following parameters: +> - *"Method used"*: `Add QC Metrics` +> - {% icon param-file %} *"Input file with the Seurat object"*: `Input 3k PBMC` (output of **Seurat Create** {% icon tool %}) +> - *"Calculate percentage of reads based on"*: `Pattern in gene names` +> - *"Pattern/regex to match in gene names"*: `^MT-` +> +> 2. Rename the output as `Mitochondrial Annotations` +{: .hands_on} + +When we use these QC metrics to remove low quality cells, we are assuming they are independent of the biological state of each cell, which isn't always the case in reality. For example, some cell types with higher energy demands may have higher proportions of mitochondrial reads because they have more mitochondria. We need to think carefully when deciding which metrics to use for filtering, but for now we will assume that these differences in our dataset are being driven by technical factors (e.g. cell damage, loss of RNAs, problems with capture and sequencing) rather than biological processes (e.g. cell types or conditions). We can therefore remove cells with poor values without misrepresenting the biology in downstream analyses. + +### Filtering of low-quality cells + +The simplest approch for identifing low-quality cells is to apply thresholds on the QC metrics. We assume that any cells beyond these thresholds are low quality cells and that this is due to those technical factors. Although this is a simple strategy, we will need to decide where to set our thresholds, and this will depend on the experimental protocol and biological system - there are no standard thresholds that we can use for every dataset. + +We can visualise the QC metrics to help us decide where to set our thresholds for filtering out low quality cells. We want to get rid of cells that have unusually high or low numbers of genes or unique features, as well as cells that have higher proportions of mitochondrial genes. + +> Visualise QC Metrics +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Mitochondrial Annotations` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: `nFeature_RNA,nCount_RNA,percent.mt` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> +> 2. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Mitochondrial Annotations` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Scatter Plot with 'FeatureScatter'` +> - *"First feature to plot"*: `nCount_RNA` +> - *"Second feature to plot"*: `percent.mt` +> +> 3. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Mitochondrial Annotations` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Scatter Plot with 'FeatureScatter'` +> - *"First feature to plot"*: `nCount_RNA` +> - *"Second feature to plot"*: `nFeature_RNA` +> +{: .hands_on} + +> +> If you are re-running the same step again or just want to change a couple of parameters for the same tool, then you can click on a previous dataset to expand it in the history and then click the {% icon dataset-rerun button %}. The tool you used to create that dataset will open in the main panel, with the settings and inputs you used before. Make any changes and run it again. +{: .tip} + +![Three violin plots each showing most points grouped together in the lower part of the plot with some outliers at higher values](../../images/scrna-seurat-pbmc3k/seurat_violin_QC_before.png "Violin Plots showing the unique features (nFeature_RNA), total counts (nCount_RNA) and the proportion of reads coming from mitochondial genes (percent.mt) for all cells") + +![Scatter plot A shows clear positive correlation with the number of unique features increasing as total counts increases. A small number of cells have very high values of both. Scatter plot B does not show a relationship between the proportion of mitochondrial genes and total counts. A small number of cells with low total counts have high values of percent.mt](../../images/scrna-seurat-pbmc3k/seurat_QC_scatter_before.png "Scatter plots showing the relationships between the total counts (nCount_RNA) and A. the number of unique features (nFeature_RNA) and B. the proportion of mitochondrial reads (percent.mt)") + +> +> +> 1. What do the violin plots tell us about the cell sizes and quality in our dataset? +> 2. What do the scatter plots in tell us about the relationship between cell size and the other QC metrics? +> +> > +> > 1. The violin plots give us an overview of the cell sizes (nCount_RNA), number of unique genes (nFeature_RNA), and proportion of mitochondrial reads for our cells. We can see that most of the cells are grouped together near the bottom of each plot, but there are some outliers that have unusually high or low values of each metric. Cells with higher values of nCount_RNA are likely to be larger as they contained more RNA. Cells with higher values of nFeature_RNA had RNAs produced from lots of different genes, while those with low nFeature_RNA had RNAs produced from a smaller range of genes. Cells with high percent.mt had lots of reads from mitochondrial genes. Although there will be some natural variation in cell size, the range of genes being expressed, and mitochondrial content, we suspect that extreme values of these three metrics reflect low quality cells. +> > 2. The scatter plots show us how these QC metrics relate to each other. As expected, the cells with higher total RNA counts also tend to have higher numbers of unique features, but some cells have particularly high values of both - these could be doublets. The proportion of mitochondrial genes doesn't increase with cell size (total counts) in the same way. We can see a small number of cells that have very high proportions of mitochondrial reads despite having low total reads - these are likely to be damaged cells that have lost a lot of their non-mitochondrial RNAs. +> > +> {: .solution} +{: .question} + +We can now set our QC thresholds based on these plots. Unfortunately, there are no standard thresholds that can be applied to every dataset, so we need to look at our data and make this decision for ourselves. + +We've already filtered out the cells with the lowest total counts when we created the SeuratObject, so we'll focus on filtering the number of unique features and the proportion of mitochondrial reads. We saw on the scatter plots that the cells with high values for these two metrics also had the highest nCount_RNA values, so we'll actually be getting rid of the cells with the highest total counts too. + +> +> If you've used Scanpy or followed the [Clustering 3K PBMCs with Scanpy]({% link topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.md %}) tutorial then you might be wondering why we aren't visualising and filtering our genes as well as our cells. Seurat enables us to set the minimum number of cells that genes must be present in when we create the SeuratObject, but the pipeline doesn't usually include any further quality checks for genes. If we did need to do further filtering, we could do this in R by calculating an appropriate QC metric and subsetting the data. +{: .comment} + +> +> +> 1. What threshold would you set for nFeature_RNA? +> 2. What threshold would you set for percent.mt? +> +> > +> > 1. A threshold of 2500 seems sensible for this dataset. The violin plot for nFeature_RNA shows that most of our cells should be under this threshold, so we won't lose too much of our data. The scatter plot shows that the cells above this threshold also had unusually high values of nCount_RNA, which suggest they could include some doublets. +> > 2. A threshold of 5% should work for this dataset. As before, the violin plot shows that the majority of our cells are below this threshold, but in this case the cells above it had low total RNA counts, which suggests these could be damaged cells that had lost a lot of their other RNAs. Although there are few standards to guide us in single cell analysis, you will see the same 5% threshold for mitochondrial content used in many studies. It often works well, but it can filter out energetic cell types such as muscle cells, so we shouldn't apply this threshold without considering whether it works for our dataset. Some studies don't filter on mitochondrial reads at all! +> > +> {: .solution} +{: .question} + +We're setting QC thresholds based on our visual inspections of the data, but we could take different approaches too. We could calculate the interquartile range for our QC metrics and use this to set a threshold (e.g. maximum of 1.5 times the IQ range above the median). We could also try running the analysis with different thresholds to see how it affects the results - this could help us to see if we're excluding any biological variation, for example if we're filtering out a specific high-energy cell type with our mitochondrial threshold. + +> Filter Out Low Quality Cells +> +> 1. {% tool [Seurat Create](toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy1) %} with the following parameters: +> - *"Method used"*: `Filter cells by QC metrics` +> - {% icon param-file %} *"Input file with the Seurat object"*: `Mitochondrial Annotations` (output of **Seurat Create** {% icon tool %}) +> - *"Minimum nFeature_RNA"*: `200` +> - *"Maximum nFeature_RNA"*: `2500` +> - *"Maximum percent.mt"*: `5.0` +> - *"Filter by a different metric"*: `No` +> +> 2. Rename the output as `Filtered Dataset` +> +{: .hands_on} + +If we produce the same plots again, we can see what has changed in our data. + +> Re-Visualise QC Metrics +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Filtered Dataset` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: `nFeature_RNA,nCount_RNA,percent.mt` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> +> 2. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Filtered Dataset` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Scatter Plot with 'FeatureScatter'` +> - *"First feature to plot"*: `nCount_RNA` +> - *"Second feature to plot"*: `percent.mt` +> +> 3. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Filtered Dataset` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Scatter Plot with 'FeatureScatter'` +> - *"First feature to plot"*: `nCount_RNA` +> - *"Second feature to plot"*: `nFeature_RNA` +> +{: .hands_on} + +![Three violin plots each showing most points grouped together and no extreme outliers](../../images/scrna-seurat-pbmc3k/seurat_violin_after.png "Violin Plots showing the unique features (nFeature_RNA), total counts (nCount_RNA) and the proportion of reads coming from mitochondial genes for all cells after filtering") + +![Scatter plot A shows clear positive correlation with the number of unique features increasing as total counts increases. No extreme outliers. Scatter plot B does not show a relationship between the proportion of mitochondrial genes and total counts. No extreme outliers.](../../images/scrna-seurat-pbmc3k/seurat_QC_scatter_after.png "Scatter plots showing the relationships between the total counts (nCount_RNA) and A. the number of unique features (nFeature_RNA) and B. the proportion of mitochondrial reads (percent.mt) after filtering") + +> +> +> 1. Have we eliminated the low-quality cells from our data? +> > +> > 1. We can see in both the violin and scatter plots that the outliers have been eliminated from our dataset - we've removed the cells with high mitochondrial counts and/or unusually high numbers of detected genes. We hope that this means we've got rid of the poor quality cells without losing any real biological variation, but we can't really be sure. Sometimes it is necessary to come back and re-think our filters - for example if you get to the end of the analysis and realise that you're missing a cell type that should be present in your data! +> > +> {: .solution} +{: .question} + +## Further Preprocessing + +Now that we're happy with the cells left in our SeuratObject, we can begin preparing the data for analysis. Seurat provides two routes for preprocessing, one with separate tools for each step and another that combines all these preprocessing steps into a single tool, `SCTransform`. + +> +> +> `SCTransform` makes preprocessing quicker and easier because it combines several steps from the standard Seurat pipeline into one. `SCTransform` normalises and scales the data while also selecting highly variable features. It also takes a different approach to preprocessing that can be more effective at removing technical effects from the data. +> +> Once you are familiar with the separate steps, you might find it more effective and convenient to use `SCTransform`. The main difference to be aware of when using `SCTransform` is that the results will be stored as a new assay called `SCT` in the SeuratObject. You will need to use this assay in the following analysis steps, instead of the original `RNA` assay. After running `SCTransform`, `SCT` will be set as the default assay for the SeuratObject so the tools should automatically use the correct one. The methods and default settings used in `SCTransform` also differ from the standard pipeline, so you should expect to see some differences in your results compared to the other route. +> +{: .comment} + +The usual preprocessing steps for single cell data are normalisation, selection of the most variable features, and scaling the data. The same steps are performed in slightly different ways by the separate preprocessing functions and SCTransform. + +- **Normalisation** + Normalisation deals with variations in the number of reads we counted for each cell that were caused by systemic technical differences. We might have captured more RNAs or produced more copies of them during the PCR amplification step in some cells. If we don't remove these differences then they could obscure the biological differences we're trying to explore. We could end up with clusters of cells based on how many RNAs we captured or copied rather than on their cell type. + The default normalisation method for the separate `NormalizeData` tool in Seurat is `LogNormalize`, which simply divides the number of counts for each gene by the total counts for the cell, multiplies this by a scale factor (10,000 is the default), and then log-transforms the results. Essentially, we're working out how many counts we would have for each gene if we had produced 10,000 reads from each cell, instead of getting different numbers of reads from different cells. The log-transformation then increases the impact that genes that showed stronger relative differences in expression between cells will have on our analysis (e.g. a gene that is expressed at an average count of 50 in cell type A and 10 in cell type B should have a bigger impact than a gene that is expressed at an average count of 1100 in A and 1000 in B). The normalised data will be stored in the `data` layer of the SeuratObject. The unnormalised data will still be available in the `counts` layer. + Although this form of normalisation is widely-used, it does assume that each cell originally had the same number of RNA molecules, which isn't the case when our samples contain cells of different types and sizes. SCTransform takes a different approach to normalisation that doesn't make the same assumption about the cells {% cite SCTransform2022 %}. Instead, it creates a model of the UMI counts that enables it to regress out variation in sequencing depth (nFeature_RNA) and pools information from genes with similar abundances to adjust the variances. The corrected counts will be stored in the `counts` layer of the new SCT assay. The log1p(counts) will be stored in the `data` layer and the pearson residuals will be stored in the `scale.data` layer. +- **Selection of highly variable features** + Rather than use the entire dataset in every stage of the analysis, we can focus on the genes that provide the most information - the highly variable genes that showed the biggest differences in expression between cells. We assume that these bigger differences reflect genuine biological differences, while the smaller differences in expression seen in other genes is down to chance or technical noise. Focusing on the most variable genes can make biological differences clearer in single cell analysis. + Feature selection can have a big impact on our analysis as we will run downstream analyses (e.g. PCA) on these genes. We will be comparing cells based on the expression of these variable genes. + Seurat's separate `FindVariableFeatures` tool selects the 2000 most variable genes by default, while `SCTransform` selects the top 3000 genes. We can select more variable genes when we use SCTransform because this method is better at removing technical effects from the data, so the additional variable features are more likely to represent biological variation. + We may need to change this setting for some datasets to ensure we're selecting the most useful genes without including too many others. +- **Scaling** + Scaling is a linear transformation that we apply to prepare our data for dimensional reduction. + The default scaling method for the separate `ScaleData` tool in Seurat shifts the expression of each gene so that the mean expression across all the cells is 0. It also scales the expression of each gene so that the variance across all cells is 1. Scaling ensures that highly expressed genes don't dominate the analysis too much - we're interested in differences in expression between cells, not in genes that are always highly expressed in all the cells. The results will be stored in the `scale.data` layer. + SCTransform doesn't scale data in the same way - although it centers the data by default, it won't scale the data unless you select this option. Instead, SCTransform usually stores the pearson residuals in the `scale.data` layer, which don't need to be scaled to the same variance. + By default, both approaches only center/scale the highly variable genes that we'll use for dimensional reduction, but it is possible to scale more genes if required. +- **Regression** + The Seurat pipeline can include another step during preprocessing of our single cell data. We can regress out (or remove) the impact of unwanted sources of variation. We could use this technique to remove the effects of the cell cycle or the differences associated with the proportion of mitochondrial genes. The goal is to reduce differences that are related to factors we are not interested in as this can help the differences we are interested in (like those between cell types or experimental groups) stand out more. + It is possible to use the `ScaleData` function to regress out unwanted variation, but the creators of Seurat recommend using `SCTransform` for preprocessing if you want to do any regression. `SCTransform` automatically regresses out variation associated with sequencing depth (unique counts or nFeature_RNA) and can also regress out other variables. If you choose to use SCTransform in this tutorial, then you'll regress out the variation associated with the proportion of mitochondrial content, just like in [Seurat's original version of this tutorial](https://satijalab.org/seurat/articles/sctransform_vignette.html) + +{% include _includes/cyoa-choices.html option1='Separate Preprocessing Steps' option2='SCTransform' default='Separate-Preprocessing-Steps' text="You can perform each preprocessing step separately, which might give you a better understanding of the different elements involved in preprocessing, or run them all at once using SCTransform." %} + +
+>Separate Preprocessing Steps +> +> 1. {% tool [Seurat Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Filtered Dataset` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Normalize with 'NormalizeData'` +> - *"Method for normalization"*: `LogNormalize` +> +> 2. {% tool [Seurat Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `rds_out` (output of previous **Seurat Preprocessing** {% icon tool %}) +> - *"Method used"*: `Identify highly variable genes with 'FindVariableFeatures'` +> - *"Method to select variable features"*: `vst` +> - *"Output list of most variable features"*: `Yes` +> - *"Number to show"*: `10` +> +> 3. {% tool [Seurat Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `rds_out` (output of previous **Seurat Preprocessing** {% icon tool %}) +> - *"Method used"*: `Scale and regress with 'ScaleData'` +> - *"Regress out a variable"*: `No` +> - *"Features to scale"*: `All Features` +> +> 4. Rename the output as `Preprocessed Data` +> +{: .hands_on} +
+ +
+>SCTransform +> +> 1. {% tool [Seurat Preprocessing](toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Filtered Dataset` (output of **Seurat Create** {% icon tool %}) +> - *"Method used"*: `Complete all preprocessing with 'SCTransform'` +> - *"Genes to calculate residual features for"*: `all genes` +> - *"How to set variable features"*: `set number of variable features` +> - *"Output list of most variable features"*: `Yes` +> - *"Number to show"*: `10` +> - *"Variable(s) to regress out"*: `percent.mt` +> +> 2. Rename the output as `Preprocessed Data` +> +> If we inspect this dataset, we can see that a new assay called `SCT` has been created and is set as the default assay. Click on the {% icon galaxy-eye %}- of the following output to check this. +> +> 3. {% tool [Seurat Data Management](toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Preprocessed Data` (output of **Seurat Preprocessing** {% icon tool %}) +> - *"Method used"*: `Inspect Seurat Object` +> - *"Display information about"*: `General` +> +{: .hands_on} +
+ +### Visualise Highly Variable Genes + +>Visualise Results +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Preprocessed Data` (output of **Seurat Preprocessing** {% icon tool %}) +> - *"Method used"*: `Plot Variable Genes with 'VariableFeaturePlot'` +> - *"Label the top most variable features"*: `Yes` +> - *"Number to show"*: `10` +> +{: .hands_on} + +
+ +![Most genes are grouped together at the bottom of the plot with low standardized variance. The selected variable genes are highlighted in red and labelled by name. They have higher standardized variances although most are still quite close to the non variable genes. The labels read PPBP, S100A9, LYZ, IGLL5, GNLY, FTL, PF4, FTH1, GNG11, and FCER1A.](../../images/scrna-seurat-pbmc3k/seurat_variable_genes.png "Plot showing the standardized variances of variable and non-variable genes. The top 10 most variable genes are labelled.") + +
+ +
+ +![Most genes are closely grouped together, right at the bottom of the plot with low standardized variance. The selected variable genes are highlighted in red and labelled by name. They are higher up on the plot, showing they have higher standardized variances although most are still quite close to the non variable genes. They are S100A9, GNLY, LYZ, S100A8, NKG7, FTL, GZMB, IGLL5, FTH1, and CCL5.](../../images/scrna-seurat-pbmc3k/seurat_variable_genes_SCT.png "Plot showing the standardized variances of variable and non-variable genes. The top 10 most variable genes are labelled.") + +
+ +> +> 1. What are the top 10 most variable genes in this dataset? +> 2. Why are we interested in these genes? +> > +> > +> > 1. We can check the list of the top variable genes in our history by clicking on the {% icon galaxy-eye %} or see them on our variable features plot. +> > +> > +> > > | | | +> > > |----|---------| +> > > | 1 | PPBP | +> > > | 2 | LYZ | +> > > | 3 | S100A9 | +> > > | 4 | IGLL5 | +> > > | 5 | GNLY | +> > > | 6 | FTL | +> > > | 7 | PF4 | +> > > | 8 | FTH1 | +> > > | 9 | GNG11 | +> > > | 10 | S100A8 | +> > {: .matrix} +> > +> > +> > +> > +> > > | | | +> > > |----|----------| +> > > | 1 | S100A9 | +> > > | 2 | GNLY | +> > > | 3 | LYZ | +> > > | 4 | S100A8 | +> > > | 5 | NKG7 | +> > > | 6 | FTL | +> > > | 7 | GZMB | +> > > | 8 | IGLL5 | +> > > | 9 | FTH1 | +> > > | 10 | CCL5 | +> > {: .matrix} +> > +> > +> > > +> > > The list of highly variable genes you end up with will depend on which preprocessing route you chose. The two preprocessing routes use different methods to select features, so they won't always end up with the same genes, although there are likely to be some similarities. `SCTransform` also returns 3000 variable genes by default, rather than the 2000 selected by `FindVariableFeatures`. We can select more features with `SCTransform` because its normalisation method is better at removing technical effects from the data, so we believe that these additional genes reflect subtler biological variations rather than technical differences. +> > {: .comment} +> > +> > 2. Single cell datasets contain a lot of information, including expression data for thousands of different genes. Some of these genes don't tell us much about the data, for example they might be housekeeping genes that are expressed at similar levels in most of our cells. We want to find the genes that can tell us most about the differences between our cells, so we want to identify the genes whose expression varies most across the dataset - focusing on these highly variable features should help us to uncover the biological differences we're looking for. +> > +> {: .solution} +{: .question} + +# Dimensionality Reduction + +Dimensional reduction is a key step in single cell analysis that simplifies our big, complex datasets enough to enable us to perform and understand further analyses. + +In single cell analysis, we're comparing cells based on their patterns of gene expression - we're looking for cells with similar transcriptomic profiles. Each gene represents a dimension of the data. If we only found two genes being expressed in our cells, we could represent them on a 2-dimensional plot. The two axes of the plot would represent the expression levels of the two genes. Each cell would become a point on the plot, positioned according to its expression of these genes. We might spot one group of cells with high levels of both genes and another group that expresses large amounts of one gene and not much of the other. + +For datasets with thousands of genes, the concept is essentially the same: each cell's expression profile defines its location in the high-dimensional expression space. Each gene is still a dimension or axis on the plot and each cell can be positioned along these axes based on their expression. It's just much harder to imagine a plot with thousands of different axes - that's why we need dimensional reduction! + +We don't have to think of the expression of each of our thousands of genes individually. The expression of some genes can be correlated if they are affected by the same biological process - cells that express a lot of one gene might also express higher levels of other genes that have similar functions. Instead of keeping the information from each of these genes as separate axes or dimensions, we can compress them into a single dimension (e.g. an 'eigengene'). We can do the same for all of the correlated groups of genes in our data, going from thousands of individual genes to perhaps 10, 20, or 50 grouped dimensions. + + Dimensionality reduction aims to reduce the number of separate dimensions in the data, which: + +- reduces the computational work in downstream analyses to only a few dimensions +- reduces noise by averaging across multiple genes to obtain a more precise representation of the patterns in the data +- enables effective plotting of the data. + +## Principal Component Analysis + +Principal Component Analysis (PCA) is a dimensionality reduction technique that identifies the axes in high-dimensional space that capture the largest amount of variation. It is a simple, highly effective strategy that is widely used in data science, including for single cell omics. + +The axes or dimensions identified by PCA will be ordered based on how much of the variation they explain. The first axis (or Principal Component, PC) is chosen so that it captures the greatest variance across cells. The next PC is the axis orthogonal to (uncorrelated with) the first that captures the greatest amount of the remaining variation across the cells. The third PC will be orthoganol to the first two and capture the greatest amount of the remaining variation, and so on. You can decide how many PCs you want to produce and then how many of these PCs you want to use in downstream analyses. + +We assume that the top PCs are more likely to represent real biological variation in the data because this should affect multiple genes in coordinated ways, having a bigger impact on variation. Random technical or biological noise should affect each gene independently, without creating patterns of correlation or explaining much variation, so these effects should be represented in the later PCs. By using just the top PCs in our downstream analysis, we can therefore focus on the biological variation and eliminate some of the unwanted noise while also making our data easier to analyse. + +### Perform the PCA + +We will start by calculating the top 50 PCs and then decide how many of these we want to use. + +The standard Seurat pipeline performs the PCA on the Variable Features only, rather than the complete dataset. You can change this using the `features` parameter, but you will need to scale the chosen features using `ScaleData` before running the PCA, if you didn't already choose this option during the preprocessing stage as we did above. + +>Perform the PCA +> +> 1. {% tool [Seurat Run Dimensional Reduction](toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Preprocessed Data` (output of **Seurat Preprocessing** {% icon tool %}) +> - *"Method used"*: `Run a PCA dimensionality reduction using 'RunPCA'` +> - *"Output list of top genes"*: `Yes` +> - *"Number of PCs to print genes for"*: `5` +> - *"Number of top genes to print for each PC"*: `5` +> +> 2. Rename the output as `PCA Results` +> +{: .hands_on} + +### Visualise the PCA Results + +We've now reduced our dataset to 50 dimensions or PCs that represent the expression of sets of correlated genes. Since we selected the option to output a list of the top genes, we can use the {% icon galaxy-eye %} icon on this output in our history to see which genes were most strongly associated with the top five PCs. We can see lists of the genes that had the strongest positive and negative scores for each PC - these are the correlated sets of genes that defined the PCs. + +{% snippet faqs/galaxy/histories_dataset_item.md %} + +Rather than just looking at a list of genes, we can also produce plots to help us better understand how our cells and genes relate to the PCs we have just computed. Let's start by finding out more about the genes that were most strongly associated with our top three PCs. We can do this with the `VizDimLoadings` plot, which shows the genes associated with each PC, how strongly each gene affected the PC, and whether it was positively or negatively correlated with the PC. The plots will look a bit different depending on which preprocessing approach you followed. + +>Visualise the PCA Results - Dimensional Loadings +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize Reduction Results with 'VizDimLoadings'` +> - *"Number of dimensions to display"*: `3` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3000` +> +{: .hands_on} + +
+ +![Three plots with dots for the top 30 genes for each PC. Each plot has dots near the positive and negative ends of the axis and no values close to zero](../../images/scrna-seurat-pbmc3k/seurat_vizdimloadings.png "Plots showing the genes with the highest positive and negative loadings for each of the first three PCs") + +
+ +
+ +![Three plots with dots for the top 30 genes for each PC. Each plot has dots near the positive and negative ends of the axis and no values close to zero](../../images/scrna-seurat-pbmc3k/seurat_vizdimloadings_SCT.png "Plots showing the genes with the highest positive and negative loadings for each of the first three PCs") + +
+ +> +> 1. What are the top positive and negative genes for the first three PCs? +> 2. Are any of our top 10 highly variable genes associated with the top PCS? Does this surprise you? +> 3. When we plot the cells along the PC axes (in the next step) do you expect to see differences in the expression of these genes along the associated axis? +> > +> > +> > 1. The list produced by the `RunPCA` function shows the genes that were most strongly positively and negatively assocated with each PC. The top positively associated genes were CST3 for PC1, CD79A for PC2, and HLA-DQA1 for PC3. The top negatively associated genes were MALAT1 for PC1, NKG7 for PC2, and PPBP for PC3. However, these top genes don't define the PCs by themselves - they are part of groups of genes that showed correlated patterns of expression. +> > The figures show us more information about the top genes. We can see the top 30 genes in each of these groups on the plots. We can also see how strongly each of these genes was associated with the PC. +> > +> > +> > 1. The list produced by the `RunPCA` function shows the genes that were most strongly positively and negatively assocated with each PC. The top positively associated genes were MALAT1 for PC1, NKG7 for PC2, and S100A8 for PC3, while the top negatively associated genes were FTL for PC1, HLA-DRA for PC2, and CD74 for PC3. However, these top genes don't define the PCs by themselves - they are part of groups of genes that showed correlated patterns of expression. +> > The figures show us more information about the top genes. We can see the top 30 genes in each of these groups on the plots. We can also see how strongly each of these genes was associated with the PC. +> > +> > 2. We can see that some of our highly variable genes are associated with the top PCs. For example, FTL is one of the top genes associated with PC1. We should expect to see some of our highly variable genes here as we used the features we selected to perform the PCA. It also makes sense that our top 10 variable genes are strongly associated with PCs, because we know these are the genes that varied most across the dataset and the PCA was looking for these strong differences in expression. However, a gene that varied a lot won't necessarily be associated with a top PC unless its expression correlates with other variable genes - a group of correlated genes is likely to have a stronger impact than a single gene, even if that one gene varies a lot. +> > 3. Since these are the genes that most are strongly associated with each PC, we should expect to see strong differences in their expression from one end of the associated axis to the other. The genes positively associated with PC1 should mainly be expressed near the positive end of the PC1 axis while the negatively associated genes should mainly be expressed at the negative end. We would expect to see similar patterns for the other PCs. +> {: .solution} +{: .question} + +Next, let's see how our cells are distributed along the top PCs. We can use `DimPlot` to create a 2-dimensional plot where each axis represents one of the PCs - being able to produce these plots is one of the benefits of dimensional reduction as it makes our data easier to interpret when we can see it on two axes rather than imagining it along thousands. If we're plotting the top PCs then we should see cells or clumps of cells spread along these axes. If PC1 is really explaining a lot of the variation in the data then we should see that there are cells with both high and low values along this axis - if they're all grouped together at one end of the axis then the PC wouldn't be telling us much about the differences between cells. We'd hope to see a similar effect along the next PCs, although it won't be as strong as for the top PC because that's the one that explained the most variation in the dataset. We should see similar relationships no matter which preprocessing approach we used, although of course the plots will look a bit different. + +>Visualise the PCA Results - DimPlot +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize Dimensional Reduction with 'DimPlot'` +> - *"Name of reduction to use"*: `pca` +> +{: .hands_on} + +
+ +![Three large groups appear in different parts of the plot with a scattering of cells in between them](../../images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot.png "PCA plot showing the distribution of cells along the first two principal components") + +> +> If you've tried the [Clustering 3K PBMCs with Scanpy]({% link topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.md %}) tutorial then you might notice that there are some differences in the relationships between the genes and PCs. Our top negative genes for PCs 2 and 3 (NKG7 and PPBP) here are among the top positively associated genes there. This is simply a mathematical quirk - in both cases, these genes are strongly associated with the top PCs, but we're just calculating or drawing our axes in the opposite direction. You should be able to spot some similarities in the plots between these two tutorials, especially in the PC2/PC3 plot, which looks like it has been flipped upside down! +{: .comment} + +
+ +
+ +![Three large groups appear in different parts of the plot with a scattering of cells in between them](../../images/scrna-seurat-pbmc3k/seurat_PCA_DimPlot_SCT.png "PCA plot showing the distribution of cells along the first two principal components") + +
+ +> +> 1. Can you see any groups or clusters of cells in the the PCA plot? +> > +> > 1. The cells seem to form three main clusters, although there are also cells spread outside and in between these groups. The cells are distributed along the full length of both axes, which makes sense as these are the PCs that explain the greatest amounts of variation in the data. +> > If you've tried the other preprocessing route in this tutorial, then you might notice that while the plots look quite similar, it seems like one of them has been flipped upside down and back to front compared to the other! This explains why MALAT1 is the top positive gene for PC1 in the separate preprocessing route but the top negative gene for PC1 in the SCTransform route - this axis is showing simiar differences, but the other way around. +> {: .solution} +{: .question} + +We don't have to use PCs 1 and 2 as the axes. We can decide which PCs we want to plot our data along. We can also use the `FeaturePlot` function to colour the plots by the expression levels of specific genes to see how this relates to the PCs, so let's see how the top genes relate to the top three PCs. + +>Visualise the PCA Results - Feature Plots +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'FeaturePlot'` +> - *"Features to plot"*: Use the top positive and negative genes for PCs 1-3: `CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP``MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74` +> - *"Dimension to plot on x axis"*: `1` +> - *"Dimension to plot on y axis"*: `2` +> - *"Name of reduction to use"*: `pca` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3100` +> +> 2. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'FeaturePlot'` +> - *"Features to plot"*: Use the top positive and negative genes for PCs 1-3: `CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP``MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74` +> - *"Dimension to plot on x axis"*: `2` +> - *"Dimension to plot on y axis"*: `3` +> - *"Name of reduction to use"*: `pca` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3100` +> +{: .hands_on} + +You should now have two plots showing the expression of these genes on different axes. + +
+ +![Six versions of the same PCA plot showing three main groups of cells in different parts of the plot. The positively associated genes for each PC are mainly shown as expressed in cells near the positive end of that axis. The opposite is true for the negatively associated genes.](../../images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots.png "PCA Plots coloured by expression of the top positive and negative markers for PCs 1 to 3, plotted along the PC1 and PC2 axes") + +![Six versions of the same PCA plot showing two main groups of cells and a smaller third group spread vertically along the middle of the plot. The positively associated genes for each PC are mainly shown as expressed in cells near the positive end of that axis. The opposite is true for the negatively associated genes.](../../images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots.png "PCA Plots coloured by the expression of the top positive and negative markers for PCs 1 to 3, plotted along the PC2 and PC3 axes") + +> +> 1. How does the expression of the top positive and negative genes relate to PCs 1 and 2? +> 2. How does the expression of the top positive and negative genes relate to PCs 2 and 3? +> > +> > 1. When we look at the expression of our top genes along PCs 1 and 2 we see that, as expected, the top positively associated gene for PC1, CST3, is almost exclusively expressed in cells at the positive end of the PC1 (horizontal) axis. The pattern is less clear for the negatively associated MALAT1 as most cells are expressing it, but if we look closely we can see that the expression is higher in cells at the negative end of the PC1 axis. We can see similar patterns for the top genes associated with PC2. The top positively associated gene, CD79A is mainly expressed by the cells closest to the top of the PC2 (vertical) axis. The top negative gene NKG7 is mainly expressed by cells along the lower half of the PC2 axis. +> > We don't see similar patterns for the genes associated with PC3 in this figure - although these genes are only expressed in parts of the plot, there is no clear relationship with either the PC1 or PC2 axes. This is exactly what we would expect to see - these genes are associated with PC3 so they shouldn't show any relationship with the axes. +> > 2. The picture looks very different when we plot our cells along PC2 and PC3. We can see two clumps of cells close together at the top of the plot and another small group of cells spread down the middle of the plot. This is why we have to be careful when interpreting dimensional reduction plots - the data can look very different depending on which of the many axes we choose to visualise. +> > Now it is the top genes for PC1 that don't show any relationship with the axes. The top genes for PC2 still show differences in expression along the PC2 axis (now the horizontal axis) - and these actually look a bit clearer than in the previous plot. We can also see how the top genes for PC3 relate to the PC3 (vertical) axis. The positively associated HLA-DQA1 is mostly expressed by cells at the top of the axis while the negatively associated PPBP is mostly expressed along the negative part of that axis. +> {: .solution} +{: .question} + +
+ +
+ +![Six versions of the same PCA plot showing three main groups of cells in different parts of the plot. The positively associated genes for each PC are mainly shown as expressed in cells near the positive end of that axis. The opposite is true for the negatively associated genes.](../../images/scrna-seurat-pbmc3k/seurat_PCA_12_featureplots_SCT.png "PCA Plots coloured by expression of the top positive and negative markers for PCs 1 to 3, plotted along the PC1 and PC2 axes") + +![Six versions of the same PCA plot showing two main groups of cells and a smaller third group spread vertically along the middle of the plot. The positively associated genes for each PC are mainly shown as expressed in cells near the positive end of that axis. The opposite is true for the negatively associated genes.](../../images/scrna-seurat-pbmc3k/seurat_PCA_23_featureplots_SCT.png "PCA Plots coloured by the expression of the top positive and negative markers for PCs 1 to 3, plotted along the PC2 and PC3 axes") + +> +> 1. How does the expression of the top positive and negative genes relate to PCs 1 and 2? +> 2. How does the expression of the top positive and negative genes relate to PCs 2 and 3? +> > +> > 1. When we look at the expression of our top genes along PCs 1 and 2 we see that, as expected, the top positively associated gene for PC1, MALAT1, is expressed at higher levels in cells at the positive end of the PC1 (horizontal) axis while the top negatively associated gene, FTL, is expressed mainly at the negative end. The pattern isn't as clear for these genes as it is for the other PCs we plotted, because both MALAT1 and FTL are expressed to some extent by most of the cells in our plots. We can see more obvious patterns for the top genes associated with PC2. The top positively associated gene, NKG7 is mainly expressed by the cells at the positive end of the PC2 (vertical) axis. The top negative gene HLA-DRA is mainly expressed by cells along the lower half of the PC2 axis. +> > We don't see similar patterns for the genes associated with PC3 in this figure - although these genes are only expressed in parts of the plot, there is no clear relationship with either the PC1 or PC2 axes. This is exactly what we would expect to see - these genes are associated with PC3 so they shouldn't show any relationship with the PC1 and PC2 axes. +> > 2. The picture looks very different when we plot our cells along PC2 and PC3. We can see two long lines of cells forming an upside-down 'V'. This is why we have to be careful when interpreting dimensional reduction plots - the data can look very different depending on which of the many axes we choose to visualise. +> > Now it is the top genes for PC1 that don't show any relationship with the axes. The top genes for PC2 still show differences in expression along the PC2 axis (now the horizontal axis). We can also see how the top genes for PC3 relate to the PC3 (vertical) axis. The positively associated S100A8 is mostly expressed by cells at the top of the axis while the negatively associated CD74 is mostly expressed along the negative part of that axis. +> {: .solution} +{: .question} + +
+ +Another option for visualising our PCA results is to use `DimHeatmap` to produce a heatmap of the PCA scores for the top genes in each cell. We can see which genes scored highly in the same cells and get an idea of how the scores varied across the dataset. We can look for groups of cells that had similar scores for a particular PC, suggesting that they share similar expression profiles - these groups of cells might become our clusters. We can also compare the patterns for different PCs. We would expect to see more similarities (genes with similar scores across cells and more cells grouping together) for the top PCs since these explain more of the variation in the data. + +> Visualise the PCA Results - Heatmaps +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize Reduction Results with 'DimHeatmap'` +> - *"Dimensions from reduction to plot"*: `1` +> - In *"Advanced Options"*: +> - *"Number of top cells to plot"*: `500` +> - *"Return an equal number of genes with + and - scores"*: `Yes` +> +> 2. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize Reduction Results with 'DimHeatmap'` +> - *"Dimensions from reduction to plot"*: `1:15` +> - In *"Advanced Options"*: +> - *"Number of top cells to plot"*: `500` +> - *"Return an equal number of genes with + and - scores"*: `Yes` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `4100` +> - *"Height of plot in pixels"*: `4400` +{: .hands_on} + +
+ +![The heatmap is sharply divided into four quarters. The left side of shows PCA scores for groups of cells that have low values for the genes in the top half of the plot that were negatively associated with PC1 and high values for genes in the bottom half that were positively associated with PC1. Cells on the right side have high values in the top half of the plot and low values in the bottom half of the plot](../../images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1.png "Heatmap of PCA scores for the top 30 genes associated with PC1") + +![The plot shows 15 heatmaps displaying PCA scores for the top cells and genes for PC1 to PC15. The first three plots are clearly divided into quarters with the pattern becoming less clear for later PCs.](../../images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15.png "Heatmap of PCA scores for the top 30 genes associated with PCs 1 to 15") + +
+ +
+ +![The heatmap is sharply divided into four quarters. The left side of shows PCA scores for groups of cells that have low values for the genes in the top half of the plot that were negatively associated with PC1 and high values for genes in the bottom half that were positively associated with PC1. Cells on the right side have high values in the top half of the plot and low values in the bottom half of the plot](../../images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_SCT.png "Heatmap of PCA scores for the top 30 genes associated with PC1") + +![The plot shows 15 heatmaps displaying PCA scores for the top cells and genes for PC1 to PC15. The first two plots are clearly divided into quarters but the pattern becomes less clear in subsequent plots.](../../images/scrna-seurat-pbmc3k/seurat_heatmap_PC_1_15_SCT.png "Heatmap of PCA scores for the top 30 genes associated with PCs 1 to 15") + +
+ +> +> Note that the cells aren't plotted in the same order in all of our heatmaps, so we can't compare the same cell (column) across multiple plots - the first column in our heatmap doesn't always represent the same cell. We have plotted the top 500 cells for each PC, which means there are lots of cells that aren't included in these plots. We are probably looking at different cells and/or cell orders on each plot. The cells that showed the strongest variation along PC1 aren't necessarily the ones that varied most along PC2, so they won't appear in the same place on the plots. +{: .comment} + +> +> 1. What does the PC1 heatmap reveal about the expression of the top genes for this PC, CST3 and MALAT1?MALAT1 and FTL? +> 2. Why do the plots become messier for lower PCs? +> 3. Which type of PCA visualisation was most useful for understanding the main sources of variation in the data? +> > +> > 1. The bottom row of the heatmap shows the PCA scores for the top positive gene associated with PC1, in the top cells for this PC. We can see that most cells on the left side of the heatmap are coloured yellow in this row, indicating they scored highly for this gene. Almost all of the cells on the right side are black or pink in this bottom row, indicating lower scores for the gene. Meanwhile, we see the opposite for the top negative gene associated with PC1, which appears in the topmost row of the heatmap. Cells (columns) that scored highly for the top positive gene had low scores for the top negative gene, while those that had low scores for the top positive gene showed high scores for the top negative gene. +> > Clearly, there is strong variation in these genes across the cells in the plot - and that variation isn't just in these two genes, but also in the 28 other genes in this heatmap. These are the two correlated groups of genes that define the first PC. Half the genes were more highly expressed in cells at one end of the PC while the other half were expressed more at the other end of the PC. +> > 2. Each of the plots for the top PCs distinguishes very clearly between two groups of cells. For each of the top PCs, one group (the left side of the plot) scored highly for most of the genes positively associated with the PC and usually had low scores for the genes negatively associated with the PC. The second group (the right side of the plot) showed the opposite, with low scores for positively associated genes and high scores for negatively associated genes. +> > The distinction becomes less clear in later PCs, with more cells showing different patterns to the rest and a less obvious distinction between the two sides of the plots. This makes sense, because the top PCs are the ones that explained the greatest amount of variation in the data, so they are linked to very clear patterns of expression across many cells. The later PCs didn't explain much variation because they aren't linked with such obvious patterns. +> > 3. The most useful plot might depend on your own preferences as well as the data you're analysing, but the DimHeatmap is often best for identifying which PCs will be most interesting for downstream analysis. Both cells and genes are ordered by their PCA scores on these plots, so you can see which PCs have the strongest impact on the most numbers of cells while also identifying the sets of features associated with different PCs. You can also limit the number of cells being plotted if you want to focus on the most extreme cells for each PC. +> {: .solution} +{: .question} + +### Determine the 'dimensionality' of the dataset + +We have reduced our dataset into 50 dimensions or PCs, but we don't necessarily need all these PCs to understand the biological variation between our cells. We can simplify our data even more by selecting a smaller number of PCs that still provide a robust representation of the data. Just like when we chose how many features to select, we're trying to find the number that will best represent the biology while getting rid of as much unwanted noise as we can. Including more PCs might help us capture more of the biological signal, but it will also increase the noise. + +The DimHeatmap plots can give you an idea of which PCs are having the strongest effects on your dataset, but an Elbow Plot can provide a simpler option for selecting how many dimensions (PCs) we should use in the following analyses. + +The Elbow Plot ranks the PCs based on the percentage of variance that each of them explains. We should see that the top PCs each explain a substantial amount of the data, but that there is a bend or elbow where the dots start lining up along the bottom axis. The PCs before the elbow explain most of the variation while those after the elbow don't provide much additional information, so we don't need to include them in our analysis. + +>Make an Elbow Plot +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Determine dimensionality with 'ElbowPlot'` +> - *"Number of dimensions to plot standard deviation for"*: `30``50` +> +{: .hands_on} + +
+ +![Dots showing the standard deviation for each PC start high for PC1 then drop substantially for each following PC. A sharp bend appears around PC10 where the drop from one PC to the next becomes very small. The dots from PC10 onwards are close to the bottom axis, with low standard deviations.](../../images/scrna-seurat-pbmc3k/seurat_elbowplot.png "Elbow Plot showing the standard deviations for the first 30 PCs") + +
+ +
+ +![Dots showing the standard deviation for each PC start high for PC1 then drop substantially for each following PC. A sharp bend appears around PC10, with a gentler slope down towards the horizontal axis from this point. The dots from PC30 onwards are close to the bottom axis, with low standard deviations.](../../images/scrna-seurat-pbmc3k/seurat_elbowplot_SCT.png "Elbow Plot showing the standard deviations for the first 30 PCs") + +
+ +> +> 1. How many PCs should we use? +> > +> > +> > 1. As with many decisions in single cell analysis, there isn't an exact method for deciding how many PCs we should use. The elbow or bend in our plot appears to be around PC9-10, so we'll use 10 dimensions in this tutorial, but you could justifiably choose anywhere between about PC7 to PC12 on the basis of this plot. It is usually better to err on the higher side than to get rid of PCs that might be useful. Sometimes it is worth repeating the analysis with different numbers of PCs to see how it affects the results. +> > As always, it is also important to consider biology when making your decision. In this case, an expert might have spotted that the genes strongly associated with PCs 12 and 13 are known markers for certain rare subtypes of immune cells (e.g. MZB1 is a marker for Plasmacytoid Dendritic cells). However, these cells are so rare that we're unlikely to find many in a dataset of this size, so these PCs might not be that useful here. In a larger dataset or one that was enriched for these cell types, we might decide to include these PCs in our analysis because of these genes. Since we only have 2700 cells, we can't be sure that this is a true biological signal rather than just noise, so we'll stick with the top 10 PCs on the basis of our Elbow Plot. +> > It's also worth noting that we calcualted 50 PCs earlier, but only plotted 30 of them here as we wouldn't expect to need all 50 to explain this small dataset (especially after seeing the weaker patterns in those later heatmaps) - if we didn't see a clear bend in this plot, we could try plotting all 50 PCs instead. +> > +> > +> > 1. The plot shows a sharp elbow or bend at around PC10, but there is a gradual slope down towards the x-axis after this rather than an immediate drop to the bottom. In order to capture some of this additional variation for our analysis, we might decide to include 30 PCs. The curve of dots is much flatter and closer to the x-axis after this point, so the additional PCs won't explain much more of the variation. +> > We can actually use more PCs when we preprocess with SCTransform than if we used the separate preprocessing tools because the more effective normalisation method seems to be better at removing technical effects from the data. When we preprocess with SCTransform, we assume that higher PCs are more likely to represent subtle biological variation rather than technical effects, so we might improve our results by including more of them. +> > +> {: .solution} +{: .question} + +# Cluster the Cells + +The preprocessing and dimensional reduction we've performed were all done in preparation for what we actually want to do to understand our data - clustering. We want to find groups of cells that are expressing similar genes. You might already have spotted some cells grouping together in the DimPlots we produced from the PCA, but we don't want to rely on our eyes to pick out clusters. We will empirically define subpopulations of cells with similar expression profiles using a clustering algorithm. + +Clustering summarises the data and allows us to describe a heterogeneous population of cells using a set of discrete and easily understandable cluster labels - each cell will be assigned to one specific cluster. Once we've explored which genes are being expressed in these clusters, we'll then be able to treat them as proxies for biological identities such as cell types or states. We'll be able to think of each cluster as a particular type of cell. + +We will perform graph-based, unsupervised clustering in two steps to find out which cells are most similar to each other and how these similar cells can be grouped together: + +1. **Computation of a neighborhood graph** + + First, we will build a graph where every cell is a node that is connected to its neighbors by a line or edge. The distances between cells (the lengths of these edges) will be based on a distance metric, which is calculated by comparing the values for our top PCs between each pair of cells. The edges are weighted based on the similarity between the cells, so that more weight is given to the connections between cells that are more similar based on their expression patterns. The cells that share the closest, most heavily weighted connections in the graph are the ones with the most similar expression profiles - they are each others' nearest neighbours. Seurat uses a K-nearest neighbor (KNN) graph, so it will draw edges (lines) between each cell and a certain number (k) of its nearest neighbours. + +2. **Clustering of the neighborhood graph** + + Second, we will partition this graph into a number of highly interconnected groups, 'quasi-cliques', or communities. Seurat provides several different algorithms for clustering, but they all work to find the optimal way to organise the data into groups. Clustering algorithms will look for communities of cells that are more closely connected with each other than with the cells outside their group. Each community represents a cluster that we can use for downstream interpretation. + +## Find Neighbors + +Seurat includes a selection of different methods for computing a neighborhood graph, but we will stick to the default settings of using a Euclidean distance metric and the `annoy` method. We will need to decide how many nearest neighbors we want to include in our KNN graph - the value we want to use for `k`. The default for Seurat is to look for the 20 nearest neighbors for each cell. + +The best approach to building the neighborhood graph and the optimal value for `k` will depend on the data we are analysing - sometimes we'll need to try a few options to find the right fit. We'll use Seurat's default value of 20 for now - we'll know how well it worked when we take a look at our clusters later. + +>Find Neighbors by Computing a Neighborhood Graph +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `PCA Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Compute nearest neighbors with 'FindNeighbors'` +> - *"Number of dimensions from reduction to use as input"*: `10``30` +> +{: .hands_on} + +## Find Clusters + +Seurat also offers a variety of different clustering algorithms, including SLM, Leiden and Louvain. We will follow the default Seurat pipeline and use the Louvain algorithm. + +We need to define a value for the resolution parameter, which determines the size of the clusters the algorithm will find. We'll set the resolution at `0.5` for now, or `0.8` if we used SCTransform for preprocessing, but this is one of those parameters that we'll often want to try a few different values for when working with single cell data. We'll set a higher resolution on the SCTransform route because we expect it to capture more of the biological variation, so we might be able to find more clusters. + +> +> Resolution is one of the key parameters you might need to change when performing clustering. It sets the 'granularity' of the clustering - you can choose a lower value to arrange your data into bigger clusters or use a higher resolution if you're looking for lots of little clusters. The best resolution can depend on how varied your cells are - do you think there are only a few different cell types or should your sample contain lots of different populations of cells? The resolution will usually need to be higher for larger datasets too. In most cases, a resolution of between 0.4 and 1.2 will work well - we're using a fairly low value here as we have a smaller dataset. +{: .comment} + +>Find Clusters using the Neighborhood Graph +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `rds_out` (output of **Seurat Find Clusters** {% icon tool %}) +> - *"Method used"*: `Identify cell clusters with 'FindClusters'` +> - *"Resolution"*: `0.5``0.8` +> - *"Algorithm for modularity optimization"*: `1. Original Louvain` +> +{: .hands_on} + +# Run non-linear dimensional reduction (UMAP/tSNE) + +We have already performed dimensional reduction using PCA to simplify our data and make it easier to analyse, but PCA plots aren't always the best way to display our data. We will often want a plot that separates out our clusters more clearly. It can make our clusters easier to interpret and enable us to use plots to show differences in characteristics such as the expression levels of specific genes between clusters. However, we need to be cautious about over-interpreting these plots - they give us an overview, but we can only look at two dimensions at a time, so we aren't seeing all of the relationships between clusters. The cells can also end up being plotted on top of each other, so some of them can be hidden from view. We shouldn't try to draw conclusions just by looking at the plots, but rather use them to make it easier to understand and share the results from tests such as differential expression analysis. + +Two options are available for non-linear dimensional reduction with Seurat on Galaxy: UMAP and tSNE. Both approaches try to learn the underlying structure of the dataset so that the cells that are most similar to each other can be placed closest to each other in the two-dimensional plots. Since we've already identifed clusters of cells with similar expression profiles, we should expect to see cells from the same cluster grouped together in the plots. However, these dimensional reduction techniques have their limitations as there is no way to fully represent our complex single cell data in just two dimensions. Both UMAP and tSNE prioritise preserving local distances in the data to make sure that cells that are very similar will be located close to one another on the plot, but in order to do this they may not be able to show the more global relationships as accurately. + +> +> If you have followed the [Clustering 3K PBMCs with Scanpy]({% link topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.md %}) tutorial, you might notice that this step was performed between construction of the neighborhood graph and finding clusters. In the Seurat pipeline, it is usually performed after clustering, but it shouldn't make any difference to your plots. You should end up with the same result if you run these steps the other way around, except that you won't be able to show your clusters on the plot if you haven't found them yet. +{: .comment} + +>Perform Dimensional Reduction with UMAP +> +> 1. {% tool [Seurat Run Dimensional Reduction](toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `rds_out` (output of **Seurat Find Clusters** {% icon tool %}) +> - *"Method used"*: `Run a UMAP dimensional reduction using 'RunUMAP'` +> - *"Nem of reduction to use"*: `pca` +> - *"UMAP implementation to run"*: `uwot` +> - *"Run UMAP on dimensions, features, graph or KNN output"*: `dims` +> - *"Number of dimensions from reduction to use as input"*: `10``30` +> +> 2. Rename the output as `UMAP Results` +> +{: .hands_on} + +## Visualise UMAP + +Now we can visualise the UMAP, just as we did with the PCA. We can also colour in the UMAP plot based on the expression of genes we're interested in - we'll start by seeing where the cells expressing the top genes associated with PCs 1-3 have ended up in the UMAP plot. + +>Visualise the UMAP +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize Dimensional Reduction with 'DimPlot'` +> - *"Name of reduction to use"*: `umap` +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'FeaturePlot'` +> - *"Features to plot"*: Use the top positive and negative genes for PCs 1-3: `CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP``MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74` +> - *"Name of reduction to use"*: `umap` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3100` +> +{: .hands_on} + +
+ +![Plot showing three big groups of cells coloured by cluster, from 0 to 8. The smallest of these three main groups only contains cells coloured as cluster 3. The other two groups are made up of cells from different clusters.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot.png "UMAP coloured by cluster") + +![Six versions of the same UMAP plot, each coloured by the expression of a different gene. MALAT1 shows higher expression in the clusters where CST3 expression is low. CD79A expression is highest in cluster 3 while NKG7 expression is highest in clusters 4 and 6. HLA-DQA1 expression is mainly in clusters 3 and 7 while PPBP expression is mostly in the tiny cluster 8.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes.png "UMAP plots coloured by expression of the top genes associated with PCs 1-3") + +
+ +
+ +![Plot showing three big groups of cells coloured by cluster, from 0 to 11. The smallest of these three main groups only contains cells coloured as cluster 3. The other two groups are made up of cells from different clusters.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_SCT.png "UMAP coloured by cluster") + +![Six versions of the same UMAP plot, each coloured by the expression of a different gene. MALAT1 shows higher expression in the clusters where FTL expression is low. NKG7 expression is highest in clusters 4, 5 and 7 while HLA-DRA expression is highest in clusters 1, 3, 6 and 9. s100A8 expression is mainly in cluster 1 while CD74 expression is highest in cluster 3.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_top_genes_SCT.png "UMAP plots coloured by expression of the top genes associated with PCs 1-3") + +
+ +> +> 1. How many clusters have we identified? How do they relate to the shapes you can see on the plot? +> 2. Are there any relationships between our clusters and expression of the top genes associated with the first three PCs? +> +> > +> > 1. We have three main groups of cells in our plot and they are more clearly separated here than the cells in our PCA plot. Since the cells have been coloured by the cluster they were assigned to by the `FindClusters` algorithm, we can see that we have identified 912 clusters in our data. The first cluster is numbered as zero. If you have tried both preprocessing routes in this tutorial, you will see that we identified more clusters when using SCTransform (12 of them) than with the separate preprocessing tools (only nine!). It seems that preprocessing with SCTransform enables us to capture more of the biological variation. +> > Only one of the three main clumps of cells is made up of a single cluster (cluster 3). The other two big groups have been split up into different clusters by the algorithm, suggesting that there are some differences within them - this is one of the reasons we don't just want to rely on our eyes when identifying clusters. The UMAP can give us a quick overview of the data and our clusters, but we must be careful about overinterpreting it. We can't see everything about the data just be looking! +> > 2. Although this plot looks somewhat different from our PCA plot, we can still see patterns in the expression of the top genes that were associated with our PCs. The top positive and negative genes associated with PCs 1, 2, and 3 are expressed in different parts of the UMAP plot. This makes sense, because we used the PCA to make the UMAP, so the cells that were at different ends of the PC axes should be in different parts of the UMAP plot too. We don't see the same relationships between the genes and the axes, because we're now looking at a UMAP plot not a PCA plot - the UMAP axes are not the same as the PCA axes! +> > As well as the broader patterns across the plot, we can also see some relationships between the top genes and specific clusters. The top negative gene for PC2, NKG7, is expressed at the highest level in cluster 6 and at a slightly lower level in the adjacent cluster 4.The top negative gene for PC1, FTL, is expressed in both the neighbouring clusters 1 and 6, but the top positive gene for PC3, S100A8, is only expressed in cluster 1. This must be one of the genes that caused the clustering algorithm to assign these cells into separate clusters even though they are part of the same larger group on the plot. Again, this makes sense because we used the PCA reduction to create the neighborhood graph that we then used to identify our clusters. +> {: .solution} +{: .question} + +## Other ways to visualise clusters + +UMAP plots aren't the only way to see what is going on with the clusters we've just identified - and they aren't always the best choice when we're interested in which cells express specific genes. For example, we can create violin plots to show the expression of our top six genes from the PCA across the clusters we've just identified. + +>Visualise Expression across Clusters with a Violin Plot +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: Use the top positive and negative genes for PCs 1-3: `CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP``MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `3` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3100` +> +{: .hands_on} + +
+ +![Six violin plots. CST3 is expressed in clusters 1, 5 and 7 and in some cells in cluster 8. MALAT1 is expressed across all clusters except 8. CD79A is expressed in cluster 3. NKG7 is expressed in clusters 4 and 6. HLA-DQA1 is expressed in clusters 3 and 7 with one cell in cluster 1 expressing very high levels. PPBP is expressed in cluster 8.](../../images/scrna-seurat-pbmc3k/seurat_topgenes_violin.png "Violin plots showing expression of the top genes associated with PCs 1-3 in each cluster") + +
+ +
+ +![Six violin plots. MALAT1 is expressed in most clusters, but there is less expression in clusters 1, 6, 9, and 11, which are the clusters with the highest expression of FTL. NKG7 is expressed in clusters 4, 5, and 7. HLA-DRA is expressed by clusters 1, 3, 6, and 9. S100A8 is expressed at the highest levels in cluster 1 but also appears in clusters 6 and 9. CD74 is most highly expressed in clusters 3 and 9 but is also present in other clusters.](../../images/scrna-seurat-pbmc3k/seurat_topgenes_violin_SCT.png "Violin plots showing expression of the top genes associated with PCs 1-3 in each cluster") + +
+ +> +> 1. Are the top genes for PCs 1-3 expressed in the clusters you expected? +> 2. Do you find it easier to understand gene expression with the violin plots or the UMAPs we coloured by expression in the last step? +> > +> > 1. We can see the same expression patterns in the violin plots as we did in the UMAP feature plots. For example, MALAT1 is expressed across most of the clusters while PPBP is only expressed in cluster 8.*For example, S100A8 expression is highest in cluster 1. +> > We can also see that there is some expression of the top genes outside the clusters they are most highly expressed in, although usually at low levels in a small number of cells. One exception we can see on this plot is a cell in cluster 1 that expressed very high levels of HLA-DQA1 - perhaps this cell was assigned to the wrong cluster or maybe there was some contamination or error, but it could also just be an unusual cell that really did have higher levels of this gene! +> > 2. The UMAP plots can give us a good overview of the data that you may find easier to picture and remember, but it can be hard to tell where one cluster ends and another begins. The violin plots can be clearer as each cluster is plotted separately and we can see the whole range of expression - we would have a hard time picking out that outlier in cluster 1 with high levels of HLA-DQA1 from the UMAP plot.we can spot outliers more easily on this plot. +> {: .solution} +{: .question} + +Looking at the expression of these genes can tell us something about our clusters, but the top genes from the PCA aren't necessarily the ones that will tell us what our clusters represent. Our next step is to find these marker genes. + +# Find Marker Genes + +We have now identified and plotted our clusters, groups of cells that share similar expression profiles, but we don't yet know which genes are part of these cluster-specific profiles - although looking at the top genes associated with PCs 1-3 has given us some hints. We need to identify the genes that drive separation between clusters. These marker genes can then be used to assign biological sense (e.g. cell type) to each cluster, based on where we know these genes are usually expressed. We can also use them to identify subtler differences between clusters, such as changes in activation or differentiation state, based on the behaviour of genes in the affected pathways. + +Marker genes are usually detected by their differential expression (DE) between clusters. We're looking for the genes that were expressed much more in one cluster than in the other(s). Seurat provides a number of statistical tests for quantifying these differences, with the Wilcoxon rank sum test as the default. + +Seurat also provides different tools for finding markers using these tests so that we can ask various questions about how genes are differentially expressed between clusters or experimental groups. We can use `FindAllMarkers` to identify the markers of each cluster compared to all the other clusters or we can use `FindMarkers` to look for differences between specific clusters or groups. Let's try out some of these options on our dataset. + +> +> When we find markers using the Seurat tools on Galaxy, we will get two outputs: a CSV file and an RDS file. Both files contain the same content, a table of marker genes and the relevant statistics from the test, but in different formats. We'll be able to read the CSV table, while Seurat tools can interact with the RDS file. Seurat doesn't save the outputs from DE tests into the original SeuratObject by default. This means that we won't be able to use the RDS outputs from finding markers if we want to perform further analysis of our dataset - we'll need to use the output from the previous step instead as that is where all our expression data, metadata, reductions and neighborhood graphs are. However, we can use the RDS markers file for plotting and investigating our marker genes. +{: .comment} + +## Find Positive Markers for Every Cluster Compared to the Rest + +Usually, the first question we'll want to ask is which genes define each of our clusters as this can help us to understand why we've ended up with these groups. If we're trying to learn more about a particular group of cells (e.g. a specific cell type or condition) then this can tell us which cluster or clusters these cells are in. + +The `FindAllMarkers` option is the simplest way to do this as it will run the DE test for each cluster simultaneously. The results will tell us which genes set each cluster apart from all the rest of the clusters. We might find markers that are known to be expressed in specific cell types, which will enable us to start annotating our clusters. + +We can look for both positive and negative markers, or limit the results to just the positive markers for the clusters. We'll start by looking for the positive markers of all clusters. + +> Use `FindAllMarkers` to Compare All Clusters +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Identify marker genes with 'FindAllMarkers'` +> - *"Select test to run"*: `wilcox` +> - *"Limit output to top N markers per cluster"*: `No` +> - In *"Advanced Options"*: +> - *"Only return positive markers"*: `Yes` +> +{: .hands_on} + +The output is a list of positive markers for each cluster, genes that were significantly more likely to be expressed by cells in that cluster than in the rest of the dataset. + +Take a look at the second output, the CSV file. You can click on the dataset to see a quick peek of it in the history panel or use the {% icon galaxy-eye %} icon to see the full table in the main panel. + +{% snippet faqs/galaxy/histories_dataset_item.md %} + +The first column is the name of the marker gene. The following columns tell us the: + +- **p_val** - unadjusted p-value +- **avg_log2FC** - log fold-change of average expression between the two groups, which is positive if expression was higher in the first group and negative if lower (in this case, the values are all positive as we only looked for positive markers) +- **pct.1** - percentage of cells in the cluster that express this gene +- **pct.2** - percentage of cells in the rest of the dataset that express this gene +- **p_val_adj** - adjusted p-value (Bonferroni correction using all features in dataset) +- **cluster** - which cluster the gene is a marker for +- (the first column with the gene names is also repeated at the end!) + +Although there is a lot of information here, all we need to know for now is that the markers listed for each cluster are the genes that were expressed more by these cells than any of the other clusters. We can search online for these genes to get an idea of what types of cells are in our clusters. We can always search within these results or filter them to include the ones we're interested in - for example, we could look at the markers for cluster 2. + +> +> 1. Are the top genes associated with PCs 1-3 in our list of markers? Which clusters are they markers for? +> 2. Do these results match your expectations? +> 3. What were the top 5 markers for cluster 2? +> > +> > 1. If we search the markers table for our top genes (you can use Ctrl+F to do this but it may take time for the full dataset to load when you view it), we can see that CST3 is a positive marker for clusters 1, 5, and 7 while MALAT1 is a positive marker for clusters 0, 4 and 6. CD79A was a marker for cluster 3 while NKG7 was a marker for clusters 4 and 6. HLA-DQA1 was a marker for clusters 3 and 7 while PPBP was a marker for cluster 8. So, these top genes are differentially expressed by some of our clusters. +> > 2. The results make sense, as we would expect the top positive and negative genes for each PC to be expressed in different clusters. The results also match up fairly well with what we saw on the UMAP and violin plots - although we might have thought that MALAT1 could be a marker for clusters 2 and 3 too as it seems to be highly expressed by them. The apparent difference in expression we saw in the plot wasn't strong enough to show up in this statistical test. +> > However, the top genes associated with our PCs aren't necessarily the most significant markers for our clusters (they can appear quite far down the lists) and they are often markers for more than one cluster. Again, this makes sense, because the PCA was looking for the bigger patterns across the whole dataset, while now we're looking for differences between smaller groups of cells - the clusters. +> > 3. We could scroll down through the markers table to find the results for cluster 2, but it can be easier to filter the table instead. TODO Add in Filter step here... +> > If you click on the {% icon galaxy-eye %} for the new output in your history, you should see from the `custer` column that we only have markers for cluster 2. The top five markers for this cluster were: +> > +> > +> > > | | | +> > > |----|------| +> > > | 1 | IL32 | +> > > | 2 | LTB | +> > > | 3 | CD3D | +> > > | 4 | IL7R | +> > > | 5 | LDHB | +> > {: .matrix} +> > +> > +> > +> > +> > +> > > | | | +> > > |----|--------| +> > > | 1 | RPS27 | +> > > | 2 | RPL32 | +> > > | 3 | RPS6 | +> > > | 4 | RPS12 | +> > > | 5 | RPS14 | +> > {: .matrix} +> > +> > +> {: .solution} +{: .question} + +## Find Markers of Cluster 2 + +If we are interested in a specific cluster, we might want to find out which genes set this cluster apart from all the rest. We can look for genes that are more likely to be expressed in this cluster (positive markers) and ones that are less likely to be expressed (negative markers). We might do this if we're particularly interested in one cell type or group. + +Let's try finding the positive and negative markers of cluster 2. + +>Use `FindMarkers` on a Single Cluster +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Identify marker genes for specific groups with 'FindMarkers'` +> - *"Compare markers between clusters of cells"*: `Yes` +> - *"Identity class to define markers for"*: `2` +> +{: .hands_on} + +The results from this test will look a little different than when we were comparing markers for all the clusters. + +The first column is still the name of the marker gene. The following columns tell us the: + +- **p_val** - unadjusted p-value +- **avg_log2FC** - log fold-change of average expression between the two groups, which is positive if expression was higher in the first group and negative if lower +- **pct.1** - percentage of cells in the first group that express this gene (in this case, cluster 2) +- **pct.2** - percentage of cells in the second group that express this gene (in this case, all other clusters) +- **p_val_adj** - adjusted p-value (Bonferroni correction using all features in dataset) +- (again, the gene names are repeated at the end) + +We don't have a cluster column this time as we were only testing one group against another (in this case, cluster 2 against all the rest of the clusters). + +> +> 1. What are the top five markers of cluster 2 and are they positive or negative markers? +> 2. Are these the same as the top five markers for cluster 2 when we ran `FindAllMarkers`? +> > +> > 1. When we look at the marker table, we can see that the first five genes listed as markers of cluster 2 are: +> > +> > +> > > | | | +> > > |----|------| +> > > | 1 | IL32 | +> > > | 2 | LTB | +> > > | 3 | CD3D | +> > > | 4 | IL7R | +> > > | 5 | LDHB | +> > {: .matrix} +> > +> > +> > +> > +> > +> > > | | | +> > > |----|--------| +> > > | 1 | RPS27 | +> > > | 2 | RPL32 | +> > > | 3 | S100A4 | +> > > | 4 | RPS6 | +> > > | 5 | RPS12 | +> > {: .matrix} +> > +> > +> > We can look at the third column, `avg_log2FC` to see if these are positive or negative markers. Although we didn't limit this test to positive markers, we can see that the avg_log2FC for the five top markers is positive, which means these are all positive markers for cluster 2. Expression of these genes was higher in cluster 2 than in the rest of the dataset.We can see that the `avg_log2fc` value for S100A4 is negative, which means this was a negative marker for cluster 2 - it was less likely to be expressed by these cells than in the rest of the dataset. The rest of the top five markers were positive markers.span> +> > 2. If we go back to the filtered list of cluster 2 markers, we will see the same top five markers for this cluster. Since we used `FindMarkers` to test cluster 2 against all the rest of the data, we actually performed the same test that `FindAllMarkers` does for each cluster in turn. The only difference is that we previously limited `FindAllMarkers` to positive markers only. We don't see a difference in the top five markers as these all happened to be positive markers for cluster 2, but if we keep looking down the marker tables we'll start to see differences as the negative markers we just found for cluster 2 using `FindMarkers` won't appear in our `FindAllMarkers` table. If we hadn't limited that test to positive markers, then we wouldn't see any differences.If we go back to the filtered list of cluster 2 markers, we will see most of the same genes in the top five. The only exception is that one negative marker, S100A4, which didn't appear in the `FindAllMarkers` results because we limited those to positive markers. We should see the same positive markers in both tables because when we used `FindMarkers` to test cluster 2 against all the rest of the data, we actually performed the same test that `FindAllMarkers` does for each cluster in turn. If we hadn't limited those results to positive markers, our marker genes would be identical. +>> +> {: .solution} +{: .question} + +## Find Markers Distinguishing Cluster 5 from Clusters 0 and 3 + +We just used `FindMarkers` to run the same test on cluster 2 as `FindAllMarkers` performs for all the clusters, but we can also use it to ask different questions. If we are interested in what makes one cluster different from another, then we might want to look for genes that are differentially expressed between two specific clusters. We can also choose to compare expression with more than one cluster. We might do this if we want to focus on differences between two specific cell types, without considering what makes them different from the rest of the dataset - this could be handy if we have two very similar cell types or subtypes. + +>Use `FindMarkers` to Compare Specific Clusters +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Identify marker genes for specific groups with 'FindMarkers'` +> - *"Compare markers between clusters of cells"*: `Yes` +> - *"Identity class to define markers for"*: `5` +> - *"Second identity class to compare"*: `0,3` +> +{: .hands_on} + +> +> 1. What are the top five markers of cluster 5 compared to clusters 0 and 3? +> 2. Are these the same as the top five markers for the cluster when we ran FindAllMarkers? +> > +> > 1. The top five markers in the output table are: +> > +> > +> > > | | | +> > > |----|---------------| +> > > | 1 | FCGR3A | +> > > | 2 | IFITM3 | +> > > | 3 | CFD | +> > > | 4 | CD68 | +> > > | 5 | RP11-290F20.3 | +> > {: .matrix} +> > +> > +> > +> > +> > +> > > | | | +> > > |----|----------| +> > > | 1 | GZMB | +> > > | 2 | PRF1 | +> > > | 3 | FGFBP2 | +> > > | 4 | GNLY | +> > > | 5 | GZMA | +> > {: .matrix} +> > +> > +> > +> > 2. If we go back to our `FindAllMarkers` table, we'll see that these aren't exactly the same as the top five markers for cluster 5 when we compared it to all of the rest of the dataset (remember you can filter the results of `FindAllMarkers` again if you get bored of scrolling all the way down to cluster 5!). Only two of these markers are in the top five of both lists, although we can find the other genes further down in the table if we look. +> > We wouldn't expect to see the same results because we're now looking for differences specifically between cluster 5 and clusters 0 and 3. The genes that `FindAllMarkers` identified as differentiating cluster 5 from all of the other clusters might not be best at differentiating it specifically from clusters 0 and 3 - some of those markers could actually be expressed by all three of these clusters. +> > +> {: .solution} +{: .question} + +## Find the cluster 02 markers with the highest 'classification power' + +We can also use other methods for DE analyis in Seurat. We can use the 'ROC' test to find out the 'classification power' of marker genes for our clusters. A classification power of 1 means that the expression level of this gene can perfectly assign cells to this cluster. A classification power of 0 means that the expression of this gene is useless for identifying cells in this particular cluster - it's completely random! + +>Use `FindMarkers` to Calculate Classification Power +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Identify marker genes for specific groups with 'FindMarkers'` +> - *"Compare markers for two groups of cells"*: `No` +> - *"Change cell identities before finding markers"*: `No` +> - *"Compare markers between clusters of cells"*: `Yes` +> - *"Identity class to define markers for"*: `0``2` +> - *"Minimum log-fold difference to test"*: `0.25` +> - *"Select test to run"*: `roc` +> - In *"Advanced Options"*: +> - *"Only return positive markers"*: `Yes` +> +{: .hands_on} + +> +> 1. What do the names of the top markers for cluster 02 have in common and what does this signify? +> +> > +> > 1. the top five markers in the output table are: +> > +> > +> > > | | | +> > > |----|-------| +> > > | 1 | RPS12 | +> > > | 2 | RPS6 | +> > > | 3 | RPL27 | +> > > | 4 | RPS32 | +> > > | 5 | RPS14 | +> > {: .matrix} +> > +> > +> > +> > +> > +> > > | | | +> > > |----|-------| +> > > | 1 | RPS27 | +> > > | 2 | RPL32 | +> > > | 3 | RPS6 | +> > > | 4 | RPS12 | +> > > | 5 | RPS14 | +> > {: .matrix} +> > +> > +> > +> > Many of the top markers (including all of the top five) have names starting with RP. In humans, this gene naming patterns indicates that they are ribosomal genes (encoding proteins or RNAs that form ribosomes). Cluster 02 might represent a group of cells that are very busy making new proteins using all these ribosomes. If we expect our data to include a cell type that has lots of ribosomes, then this could be a sign that they've formed their own cluster, so we'll be happy with this result (this is actually the case here, as we'll see in the next section). +> > However, if we don't expect to see differences in ribosomal content between cells, then we might suspect that we've ended up with a cluster based on ribosomal RNA content rather than on cell type. In this case, we might want to go back to the QC steps. We could score the cells for `percent.ribo` in the same way we did for `percent.mt`. We could then filter out cells with unusually high proportions of ribosomal genes or regress out the variation associated with this characteristic during the scaling step. Just as when we're filtering cells by mitochondrial RNA proportions, we would need to think carefully about this - we wouldn't want to eliminate a cell type just because it has higher proportions of ribosomal genes, which is what we could end up doing if we tried it with this particular dataset. +> > +> {:.solution} +{: .question} + +# Identify Cell Types + +Now that we've arranged our data into clusters, we can start to identify the cell types forming each cluster. We can do this by looking for clusters that express genes that we know a certain type of PBMC should express. We can also check that the clusters don't express genes that we wouldn't expect to see if we know any negative markers for a particular cell type. + +We can take two different approaches when identifying our cells: + +1. **Unsupervised:** + We could start with our list of marker genes and then see if they are known to be expressed in specific cell types. In this case, we are starting with what the data tells us - the list of genes produced by our DE tests. +2. **Supervised:** + We could start with a list of known marker genes for each cell type we expect to see in our data, then look to see which clusters are expressing these genes. With this approach, we are starting with a list of genes selected from previous research and then searching from them in our data - we're choosing or 'supervising' the genes we use. + +If we're taking the unsupervised approach, then we might want to limit our markers to the ones that showed the biggest differences in expression. We can do this using some of the extra parametes provided by `FindAllMarkers`. Let's refine the parameters to find the top 10 positive markers for each cluster that showed a log fold change of at least 1 (meaning their expression was at least twice as high). + +>Find the Top 10 Positive Markers for Each Cluster +> +> 1. {% tool [Seurat Find Clusters](toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Identify marker genes with 'FindAllMarkers'` +> - *"Minimum log-fold difference to test"*: `1.0` +> - *"Select test to run"*: `wilcox` +> - *"Limit output to top N markers per cluster"*: `Yes` +> - *"Number to show"*: `10` +> - In *"Advanced Options"*: +> - *"Only return positive markers"*: `Yes` +> +> 2. Rename the second output (the csv file) as `DE Markers` +> +> {% snippet faqs/galaxy/datasets_rename.md %} +> +{: .hands_on} + +We could now search online or in a database such as [PanglaoDB](https://panglaodb.se/) or the [Human Protein Atlas](https://www.proteinatlas.org/) to find out which cell types express the markers we've identified for each cluster. If we look at the top 10 markers for cluster 3, we see that the first gene in the list is CD79A. + +> +> 1. Which type of cells express CD79A? +> > +> > 1. A quick search online or in one of the databases should tell you that CD79A is usually expressed by B cells, which are a type of PBMC, so we would expect to find some in this dataset. This suggests that cluster 3 is made up of B cells. +> > +> {: .solution} +{: .question} + +If we were to continue with this unsupervised approach, we would look at more of the top markers for cluster 3 to check that they are expressed by the same cell type as CD79A. We would then repeat the process for the rest of our clusters. + +The unsupervised approach might be the only option if we don't know exactly what we're looking for - we might be investigating rare cell types, working with samples from an understudied species, or looking at the changes that happen in a particular disease. However, in many cases, we can use what we already know to begin to understand our dataset. We can look for genes that previous research has identified as markers for the cell types we expect to see. We won't always get all the answers we need from a supervised approach like this, but it can be a quicker way to identify known cell types. + +A supervised approach should work well for the current dataset because PBMCs have been very well characterised. We know which cell types should be present in a PBMC sample and which genes each of these cell types should be expressing. + +
+ +To begin, we'll need a list of these canonical markers for PBMCs. Let's use the one provided in the [original Seurat version](https://satijalab.org/seurat/articles/pbmc3k_tutorial) of this tutorial: + +## Known Markers for PBMCs + +> | Cell Type | Marker Genes | +> | --------------|---------------| +> | Naive CD4+ T | IL7R, CCR7 | +> | CD14+ Mono | CD14, LYZ | +> | Memory CD4+ T | IL7R, S100A4 | +> | B | MS4A1 | +> | CD8+ T | CD8A | +> | FCGR3A+ Mono | FCGR3A, MS4A7 | +> | NK | GNLY, NKG7 | +> | DC | FCER1A, CST3 | +> | Platelet | PPBP | +{: .matrix} + +
+ +
+ +To begin, we'll need a list of these canonical markers for PBMCs. Let's use the ones provided in the original Seurat [clustering](https://satijalab.org/seurat/articles/pbmc3k_tutorial) and [SCTransform](https://satijalab.org/seurat/articles/sctransform_vignette.html) tutorials: + +> | Cell Type | Marker Genes | +> | ---------------------|---------------------| +> | CD14+ Mono | CD14, LYZ | +> | B | MS4A1 | +> | FCGR3A+ Mono | FCGR3A, MS4A7 | +> | NK | GNLY, NKG7 | +> | DC | FCER1A, CST3 | +> | Platelet | PPBP | +> | Naive CD4+ T | IL7R, CCR7 | +> | Memory CD4+ T | IL7R, S100A4 | +> | IFN-activated CD4+ T | IL7R, ISG15, IL32 | +> | Naive CD8+ T | CD8A, CCR7 | +> | Memory CD8+ T | CD8A, CCL5 | +> | Effector CD8+ T | CD8A, GZMK | +{: .matrix} + +> +> If you've already done the other version of this tutorial, using the separate preprocessing tools, you might notice this list includes some additional markers for T cells. SCTransform can reveal more of the biological variation in our dataset - remember that it produced 12 clusters, rather than the nine we would find using the separate preprocessing tools. We can use these extra markers to identify those additional clusters. +{: .comment} + +
+ +The suggested marker gene for B cells isn't our top marker, CD79A, but MS4A1. If we look back at our table of the top 10 markers, we can see that this was actually the second gene in the list, so if we'd continued with the unsupervised approach we'd have ended up using the same gene to help identify our B cells. Both of these genes are actually very good markers for B cells, so some lists of B cell markers might suggest using CD79A rather than (or in addition to) MS4A1. If we plot the expression of both genes across our clusters, we should be able to see where our B cells are. + +>Make Violin Plots of B Cell Markers +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: `CD79A,MS4A1` +> +{: .hands_on} + +
+ +![Two violin plots showing expression of CD79A and MS4A1 mainly in cluster 3](../../images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin.png "Violin plots showing expression of CD79A and MS4A1 by cluster") + +
+ +
+ +![Two violin plots showing expression of CD79A and MS4A1 mainly in cluster 3](../../images/scrna-seurat-pbmc3k/seurat_MS4A1_CD79A_Violin_SCT.png "Violin plots showing expression of CD79A and MS4A1 by cluster") + +
+ +Both CD79A and MS4A1 are mainly expressed in cluster 3, with very little expression in any of the other clusters. Since most cells in cluster 3 are expressing these two well-known markers of B cells and our DE test showed they were expressed more by this cluster than in the rest of the data, we can confidently say that cluster 3 represents our B cell population. + +Sometimes the results aren't quite so clear as markers might be expressed across multiple clusters of the same or different cell types. We might need to use multiple markers to differentiate between these groups - as with T cells in this dataset. + +>Make Violin Plots of T Cell Markers +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: `IL7R,CCR7,S100A4,CD8A``IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15` +> - In *"Plot Formatting Options"*: +> - *"Number of columns to display"*: `4` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `3100` +> +{: .hands_on} + +
+ +![The first violin plot shows IL7R expressed in clusters 0, 2 and 4 (although there is some lower level expression in cluster 7). Second plot shows high CCR7 expression in cluster 0 with some expression in cluster 2. Plot 3 shows S100A4 expression across most clusters, including at high levels in cluster 2. Fourth plot shows CD8A expression in cluster 4.](../../images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers.png "Violin plots showing expression of IL7R, CCR7, S100A4, and CD8A by cluster") + +We can see that IL7R (also known as CD4+) is mainly expressed in clusters 0, 2, and 4, with some expression in cluster 7. If we were using this marker alone, we could only identify these as clusters of T cells - and in the case of cluster 7, we would be making a mistake as once we plot markers of other cell types, we'll see that these aren't T cells at all! + +If we look at which other markers are being expressed (or not expressed) by the T cell clusters, we can identify different types of T cells. Expression of CCR7 is associated with naive T cells, while S100A4 is associated with memory T cells, so you should be able to identify clusters of naive and memory CD4+ T cells in these plots. We can also see that some of our cells are expressing CD8A as well as CD4, so these are CD8+ T cells. We'll label these clusters below after looking at our other cell type markers. + +
+ +
+ +![The first violin plot show IL7R expressed in clusters 0, 2, 4, 8, and 10. Plot 2 shows CCR7 expression in clusters 2, 8, and 10. Plot 3 shows S100A4 expression across most clusters, including at high levels in cluster 0. Plot 4 shows expression of CD8A in clusters 4, 7, and 8. Plot 5 shows GZMK expression in cluster 4 with some lower level expression in cluster 7. Plot 6 shows CCL5 expression in clusters 4, 5, 7, and 11. Plot 7 shows IL32 expression across most clusters, including Plot 8 shows ISG15 expression at low levels in most clusters, with higher expression in cluster 10.](../../images/scrna-seurat-pbmc3k/seurat_violin_T_cell_markers_SCT.png "Violin plots showing expression of IL7R, CCR7, S100A4, CD8A, GZMK, and CCL5 by cluster") + +We can see that IL7R (also known as CD4+) is mainly expressed in clusters 0, 2, 4, 8, and 10. With this marker alone, we could only identify these as clusters of T cells, but if we look at which other markers are being expressed (or not expressed) by these T cell clusters, we can identify different types of T cells. To start with, we can see that two of these clusters, 4 and 8, are also expressing CD8A, which identifies them as CD8+ T cells. The expression of CD8A also helps us to identify one more cluster of T cells that we wouldn't have spotted using CD4 alone - cluster 7. + +We can use the rest of our T cell markers to identify different subtypes of CD4+ and CD8+ T cells. For our CD4+ cells, expression of CCR7 is associated with naive T cells, while S100A4 is associated with memory T cells. We also have some CD4+ T cells expressing ISG15 and IL32, which marks them as IFN activated T cells. For the CD8+ clusters, we can again use CCR7 to pick out the naive T cells, but we will use CCL5 to identify the memory T cells. We can also spot a third group of CD8+ T cells that are expressing GZMK, which identifies them as effector T cells. + +You should be able to identify clusters of all six of these T cell subtypes in your plots. Using SCTransform for preprocessing has allowed us to break down our T cells into more clusters than if we used the separate preprocessing tools. It looks like these include additional subtypes of CD8+ T cells, as we have three clusters expressing CD8A alongside different T cell subtype markers, when we would only have one cluster of CD8+ cells if we used the separate preprocessing steps. We'll label all of these clusters later, after looking at the markers for other cell types. + +
+ +> +> How do you know which markers to use to identify different cell types? +> Unfortunately, there isn't always an easy answer - it depends on how well defined the cell types you're interested in are. A quick search online or a look through the literature should give you an idea of which markers other people have used to identify the cell types you expect to see in your sample. You could look for genes that have been identified by previous single cell studies or ones that are used to identify or isolate a specific cell type. For well studied cell types like PBMCs, it should be easy to find lists of canonical markers that are often used. +{: .comment} + +To continue with the supervised approach, we can check the expression of the chosen markers in our clusters to see if they match the known expression patterns of specific cell types. We could just look for these genes in our list of the top positive markers, but there are also several options for visualising marker gene expression. We'll try a few of them here to give you an idea of the types of plots you can use in the future. + +> Colour the UMAP Plot by Canonical Marker Expression +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'FeaturePlot'` +> - *"Features to plot"*: `IL7R,CCR7,CD14,LYZ,S100A4,MS4A1,CD8A,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP``CD14,LYZ,MS4A1,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP,IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15` +> - *"Name of reduction to use"*: `umap` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `4100` +> - *"Height of plot in pixels"*: `4100` +> +{: .hands_on} + +
+ +![14 UMAP Plots coloured by expression of different genes. Plot 1 shows IL7R expression in clusters 0, 2 and part of cluster 4. Plot 2 shows CCR7 expression in cluster 0. Plot 3 shows CD14 expression mainly in cluster 1. Plot 4 shows LYZ expression in clusters 1, 5 and 7. Plot 5 shows high S100A4 expression in clusters 1, 5, and 7 with medium expression in clusters 2, 4 and 6. Plot 6 shows MS4A1 expression in cluster 3. Plot 7 shows CD8A expression in cluster 4. Plot 8 shows FCGR3A expression in clusters 5 and 6. Plot 9 shows MS4A7 expression in cluster 5. Plot 10 shows GNLY expression in cluster 6. Plot 11 shows NKG7 expression in clusters 4 and 6. Plot 12 shows FCER1A expression in cluster 7. Plot 13 shows CST3 expression in clusters 1, 5 and 7. Plot 14 shows PPBP expression in cluster 8.](../../images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers.png "UMAP plots showing expression of canonical markers for PBMCs") + +
+ +
+ +![18 UMAP Plots coloured by expression of different genes. Plot 1 shows CD14 expression mainly in cluster 1. Plot 2 shows LYZ expression in clusters 1, 6, and 9. Plot 3 shows MS4A1 expression in cluster 3. Plot 4 shows FCGR3A expression in clusters 5 and 6. Plot 5 shows MS4A7 expression in cluster 6. Plot 6 shows GNLY expression in cluster 5. Plot 7 shows NKG7 expression in clusters 5 and 7. Plot 8 shows FCER1A expression in cluster 9. Plot 9 shows CST3 expression in clusters 1, 6, 9, and 11. Plot 10 shows PPBP expression in cluster 11. Plot 11 shows IL7R expression in clusters 0, 2, 4, 8, 10 and part of cluster 7. Plot 12 shows CCR7 expression in clusters 2, 8, and 10. Plot 13 shows high S100A4 expression in clusters 1 and 6 with medium expression in clusters 0, 4, 5, 7 and 9. Plot 14 shows CD8A expression in clusters 4, 7, and 8. Plot 15 shows GZMK expression in cluster 4. Plot 16 shows CCL5 expression in clusters 4, 5, and 7. Plot 17 shows IL32 expression in clusters 0, 2, 4, 5, 6, 7, and 11. Plot 18 shows high ISG15 expression in cluster 10 with some expression in clusters 1 and 6](../../images/scrna-seurat-pbmc3k/seurat_FeaturePlot_CellTypeMarkers_SCT.png "UMAP plots showing expression of canonical markers for PBMCs") + +
+ +We have produced a series of UMAP plots, each coloured according to the expression level of a different marker gene. We can look back at the UMAP plots we created earlier showing the cluster numbers to see which areas correspond to specific clusters. + +> +> 1. Are the markers clearly associated with one or more clusters? +> 2. Are the canonical markers only expressed in these clusters? +> 3. How similar are the clusters in terms of their expression of these cell type markers? Are more similar cell types closer together on the UMAP plot? +> > +> > 1. Each cell type marker is clearly expressed at higher levels in certain parts of the plots - we don't have the same problem as we did when we plotted the top genes for PCs 1-3 and found that MALAT1 was expressed at quite high levels across the entire plot! When we compare the expression plots to the clusters we previously plotted, we can see that they match up quite well with each other. Most of the markers are expressed more in certain clusters, but there isn't always a clear boundary, especially for clusters that are close together in the plot. Some markers are associated with a single cluster, such as MS4A1 and PPBP, while others are expressed across multiple clusters. +> > 2. Although marker expression mainly occurs in one or more clusters, there is still some expression by cells in other parts of the plot. It could be that these genes are sometimes expressed by other cell types, but these could also be cells of the same type that have simply ended up further away in the plot. It can sometimes be hard to tell which cluster these cells have been assigned to from these plots, especially when you consider that some points could be hidden behind others. Cells of the same type might have been assigned to different clusters, but there could also be a few cells that have been plotted at a distance from where the main part of their assigned cluster appears on this plot. If we look back at the UMAP coloured by cluster, we can spot a couple of differently coloured cells at the tip of cluster 7 and by cluster 8.If we look back at the UMAP coloured by cluster, we can spot a couple of differently coloured cells and mixed into cluster 3 and at the tip of cluster 10. +> > 3. We can distinguish between all of our clusters on the basis of which known markers they are shown to express. Each cluster expresses a unique combination of these genes, although the markers aren't only expressed in the clusters they're mainly associated with. Some of the clusters are more similar to each other, particularly those that are close together on the plot, forming part of the same larger group of cells. For example, clusters 1, 5 and 7 are grouped together on the plot and all express a lot of LYZ, S100A4, and CST3.For example, clusters 1 and 6 are grouped together on the plot for and both express a lot of S100A4 and CST3. Since the UMAP was created based on the PCA, which was in turn based on similarities in expression of the highly variable genes, it makes sense that clusters that are plotted closer together are more similar in expression to each other. However, we should always remember that we're only looking at a 2D plot of the first two UMAP dimensions, so we shouldn't read too much into what we see! The plot can't tell us everything about the relationships between cells and clusters. +> {: .solution} +{: .question} + +Another option for visualising marker gene expression across clusters is the Violin Plot, which can sometimes make it easier to distinguish between clusters. Each cluster is plotted separately, so we don't need to work out which cluster a cell belongs to, which can be tricky when we have similar clusters close together and often overlapping on the UMAP plot. + +> Use Violin Plots to Compare Expression of Canonical Markers by Cluster +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Violin Plot with 'VlnPlot'` +> - *"Features to plot"*: `IL7R,CCR7,CD14,LYZ,S100A4,MS4A1,CD8A,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP``CD14,LYZ,MS4A1,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP,IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `4100` +> - *"Height of plot in pixels"*: `4100` +> +{: .hands_on} + +
+ +![14 Violin plots showing expression of different genes. Plot 1 shows IL7R expression in clusters 0, 2, and 4, with some expression in cluster 7. Plot 2 shows CCR7 expression in cluster 0 and to a lesser extent in cluster 2. Plot 3 shows CD14 expression in cluster 1. Plot 4 shows LYZ expression across all clusters, with the highest expression in clusters 1 and 7. Plot 5 shows S100A4 expression across all clusters, with the highest expression in clusters 1 and 5. Plot 6 shows MS4A1 expression in cluster 3. Plot 7 shows CD8A expression in cluster 4. Plot 8 shows FCGR3A expression in clusters 5 and 6. Plot 9 shows MS4A7 expression in cluster 5 and to a lesser extent in cluster 1. Plot 10 shows GNLY expression in clusters 4 and 6. Plot 11 shows NKG7 expression in clusters 4 and 6, with some expression at lower levels in clusters 5 and 7. Plot 12 shows FCER1A expression in cluster 7. Plot 13 shows CST3 expression in clusters 1, 5, and 7 with some expression in cluster 8. Plot 14 shows PPBP expression in cluster 8.](../../images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers.png "Violin plots showing expression of canonical marker genes by cluster") + +
+ +
+ +![18 Violin plots showing expression of different genes. Plot 1 shows CD14 expression in cluster 1. Plot 2 shows LYZ expression across all clusters, with high expression in cluster 1 and 9. Plot 3 shows MS4A1 expression in cluster 3. Plot 4 shows FCGR3A expression in clusters 5, 6, and 7. Plot 5 shows MS4A7 expression in cluster 6 with some low expression in cluster 1. Plot 6 shows high GNLY expression in cluster 5 as well as expression at various levels in cluster 7. Plot 7 shows NKG7 expression in clusters 4, 5, and 7. Plot 8 shows FCER1A expression in cluster 9. Plot 9 shows CST3 expression in clusters 1, 6, 9, and 11. Plot 10 shows PPBP expression in cluster 11. Plot 11 shows IL7R expression in clusters 0, 2, 4, 8, and 10. Plot 12 shows CCR7 expression in clusters 2, 8, and 10. Plot 13 shows S100A4 expression across all clusters, with the highest expression in clusters 1, 6, and 9. Plot 14 shows CD8A expression in cluster 4, 7, and 8. Plot 15 shows GZMK expression in cluster 4 with some expression in cluster 7. Plot 16 shows CCL5 expression in cluster 4, 5, 7, and 11. Plot 17 shows IL32 expression across clusters 0, 2, 4, 5, 7, 8, and 10 with some expression in cluster 11. Plot 18 shows ISG15 expression across most clusters, with the highest expression in cluster 10.](../../images/scrna-seurat-pbmc3k/seurat_Violin_CellTypeMarkers_SCT.png "Violin plots showing expression of canonical marker genes by cluster") + +
+ +> +> 1. Are the markers clearly associated with one or more clusters? +> 2. Are the canonical markers only expressed in these clusters - and are they expressed at the same level by all the cells in these clusters? +> 3. How similar are the clusters to each other in terms of their marker gene expression - are there any clusters that seem particularly alike? +> > +> > 1. The violin plot can make the associations between clusters and markers look clearer, since each cluster is plotted separately and labelled by number. Some of the markers are clearly expressed mainly in one or a few clusters, while others like LYZ and S100A4 are expressed across most of the clusters. +> > 2. None of the markers are unique to a specific cluster - there are always a few dots (or cells) in the other clusters that are expressing these genes. We can also see that the expression level varies within each cluster as there are cells at different positions along the vertical axis. As well as looking at the expression level in individual cells (dots) we can compare the overall expression patterns by looking at the coloured violin shapes, which show us where most of the cells in each cluster are plotted. For example, we can see there was more variation in the expression level of NKG7 in cluster 4 (where the violin shape is longer) than there was in cluster 65 (where there is a short violin shape as all the cells showed high expression of this gene). +> > 3. Each cluster has a unique pattern of expression of the canonical markers. Some clusters are very distinct as they express markers that are rarely seen in other clusters. Cluster 3 is the only cluster to express MS4A1 and it doesn't express much of the other markers. Other clusters seem to be more similar to each other, with multiple clusters of T cells all expressing IL7R, although the expression levels of other genes vary between these cells allowing us to classify them as different subtypes. This makes sense because cluster 3 is far away from all the other clusters in the UMAP while the different T cell clusters are close to each other because of their similarities. +> {: .solution} +{: .question} + +Based on our table of marker genes and these plots, we know which clusters were expressing the canonical markers for each cell type or subtype. We can use this information to annotate our clusters by cell type. + +
+ +> +> 1. Can you identify the cell types for each cluster? +> > +> > 1. Based on the expression of the known PBMC markers, we can assign the following cell types to our clusters: +> > +> > > | Cell Type | Marker Genes | Clusters | +> > > | ---------------------|-------------------|-----------------| +> > > | Naive CD4+ T | IL7R, CCR7 | 0 | +> > > | CD14+ Mono | CD14, LYZ | 1 | +> > > | Memory CD4+ T | IL7R, S100A4 | 2 | +> > > | B | MS4A1 | 3 | +> > > | CD8+ T | CD8A | 4 | +> > > | FCGR3A+ Mono | FCGR3A, MS4A7 | 5 | +> > > | NK | GNLY, NKG7 | 6 | +> > > | DC | FCER1A, CST3 | 7 | +> > > | Platelet | PPBP | 8 | +> > {: .matrix} +> {: .solution} +{: .question} + +
+ +
+ +> +> 1. Can you identify the cell types for each cluster? +> > +> > 1. Based on the expression of the known PBMC markers, we can assign the following cell types to our clusters: +> > +> > > | Cell Type | Marker Genes | Clusters | +> > > | ---------------------|-------------------|----------------| +> > > | Memory CD4+ T | IL7R, S100A4 | 0 | +> > > | CD14+ Mono | CD14, LYZ | 1 | +> > > | Naive CD4+ T | IL7R, CCR7 | 2 | +> > > | B | MS4A1 | 3 | +> > > | Effector CD8+ T | CD8A, GZMK | 4 | +> > > | NK | GNLY, NKG7 | 5 | +> > > | FCGR3A+ Mono | FCGR3A, MS4A7 | 6 | +> > > | Memory CD8+ T | CD8A, CCL5 | 7 | +> > > | Naive CD8+ T | CD8A, CCR7 | 8 | +> > > | DC | FCER1A, CST3 | 9 | +> > > | IFN-activated CD4+ T | IL7R, ISG15, IL32 | 10 | +> > > | Platelet | PPBP | 11 | +> > {: .matrix} +> {: .solution} +{: .question} + +
+ +We can now rename our clusters using these cell names, while keeping a copy of the original idents (the cluster numbers) in case we want to use them again. It will be easier to interpret our plots and any downstream analyses we run if we can use cell names rather than cluster numbers! + +> Rename Clusters with Cell Types +> +> 1. {% tool [Seurat Data Management](toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `UMAP Results` (output of **Seurat Run Dimensional Reduction** {% icon tool %}) +> - *"Method used"*: `Manipulate Seurat Object` +> - *"Manipulation to perform"*: `Rename idents` +> - *"Rename all idents"*: `Yes` +> - *"New names"*: `CD4 Naive T, CD14 Mono, CD4 Memory T, B, CD8 T,FCGR3A Mono, NK, DC, Platelet``CD4 Memory T, CD14 Mono, CD4 Naive T, B, CD8 Effector T, NK, FCGR3A Mono, CD8 Memory T, CD8 Naive T,DC, CD4 IFN-activated T,Platelet` +> - *"Save copy of old idents first"*: `Yes` +> +> 2. Rename the output as `Annotated Clusters` +> +{: .hands_on} + +Now we can plot our UMAP again, this time showing the names of our clusters. + +> Revisualise the UMAP with Cell Type Annotations +> +> 1. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Annotated Clusters` (output of **Seurat Data Management** {% icon tool %}) +> - *"Method used"*: `Visualize Dimensional Reduction with 'DimPlot'` +> - *"Name of reduction to use"*: `umap` +> +{: .hands_on} + +
+ +![Plot showing three big groups of cells divided into 9 clusters and coloured by cell type. The smallest of these three main groups only contains cells coloured as B cells. The other two groups are made up of cells from different cell types.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes.png "UMAP coloured by cell type") + +
+ +
+ +![Plot showing three big groups of cells divided into 12 clusters and coloured by cell type. The smallest of these three main groups only contains cells coloured as B cells. The other two groups are made up of cells from different cell types.](../../images/scrna-seurat-pbmc3k/seurat_UMAP_DimPlot_CellTypes_SCT.png "UMAP coloured by cell type") + +
+ +## Canonical Markers vs Marker Genes + +We've now annotated our clusters by cell type using the supervised approach, but let's try making one more type of plot that we could have used during this process. We can make heatmaps of the canonical markers for each cell type and of the top markers for each cluster to see how expression of these genes varies within and between our clusters. + +In order to create a heatmap, we need to prepare a tabular file with a list of the genes we want to plot. For the canonical markers, you could create this table outside of Galaxy and then upload it or follow the first step below to create the file in Galaxy. + +> Create Heatmaps to Compare Expression by Cluster - Canonical Markers +> +> 1. Use the Upload Data - Paste/Fetch data option to create a table of genes to plot. Select the input type as **tabular** and enter a list of genes, one on each row. You can type these in or copy and paste the following list - don't copy the empty header row if you do this! +> +> {% snippet faqs/galaxy/datasets_create_new_file.md format="tabular" %} +> +> +> | | +> |--------| +> | IL7R | +> | CCR7 | +> | CD14 | +> | LYZ | +> | S100A4 | +> | MS4A1 | +> | CD8A | +> | FCGR3A | +> | MS4A7 | +> | GNLY | +> | NKG7 | +> | FCER1A | +> | CST3 | +> | PPBP | +> +> +> +> +> +> | | +> |--------| +> | IL7R | +> | GZMK | +> | CCL5 | +> | CCR7 | +> | CD14 | +> | IL32 | +> | ISG15 | +> | LYZ | +> | S100A4 | +> | MS4A1 | +> | CD8A | +> | FCGR3A | +> | MS4A7 | +> | GNLY | +> | NKG7 | +> | FCER1A | +> | CST3 | +> | PPBP | +> +> +> +> 2. Rename the file as `Canonical Markers` when it has finished uploading and make sure the datatype is **tabular** +> +> {% snippet faqs/galaxy/datasets_change_datatype.md %} +> +> 3. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Annotated Clusters` (output of **Seurat Data Management** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'DoHeatmap'` +> - {% icon param-file %} *"List of features to plot"*: `Canonical Markers` (Input dataset) +> - In *"Plot Formatting Options"*: +> - *"Size of text above colour bar"*: `3.5` +> - *"Angle of text above colour bar"*: `60` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `4100` +> - *"Height of plot in pixels"*: `4100` +> +{: .hands_on} + +
+ +![Heatmap showing expression of IL7R mainly in CD4NaiveT, CD4MemoryT, and CD8T cells, expression of CCR7 mainly in CD4NaiveT, CD4MemoryT, and B cells, expression of CD14 in CD14Mono cells, expression of LYZ in clusters CD14Mono, FCGR3Mono, and DC cells, expression of S100A4 in CD14Mono and FCGR3A cells, expression of MS4A1 in B cells, expression of CD8A in CD8T cells, expression of FCGR3A in FCGR3AMono and NK cells, expression of MS4A7 in FCGR3AMono and some CD14Mono cells, expression of GNLY in NK cells and to a lesser extent in CD8T cells, expression of NKG7 in CD8T and NK cells, expression of FCER1A in cluster DC, expression of CST3 in CD14Mono, FCGR3A and DC cells, and expression of PPBP in Platelets.](../../images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers.png "Heatmap showing expression of known PBMC markers by cluster") + +
+ +
+ +![Heatmap showing expression of IL7R mainly in CD4MemoryT, CD4NaiveT, and CD8EffectorT cells, expression of GZMK in CD8EffectorT cells and some CD8MemoryT cells, expression of CCL5 in CD8EffectorT, CD8MemoryT and most NK cells, expression of CCR7 mainly in CD4NaiveT and CD8NaiveT cells, expression of CD14 in CD14Mono cells, expression of IL32 in CD4MemoryT, CD8EffectorT, and CD8MemoryT cells as well as some NK cells, expression of ISG15 in CD4IFN-activatedT cells as well as some CD14Mono and FCGR3AMono cells, expression of LYZ in CD14Mono and DC cells, expression of S100A4 mainly in CD14Mono and FCGR3AMono cells but also in some CD4MemoryT cells, expression of MS4A1 in B cells, expression of CD8A in CD8EffectorT, CD8MemoryT, and CD8NaiveT cells, expression of FCGR3A in FCGR3AMono and NK cells as well as some CD8MemoryT cells, expression of MS4A7 in FCG3RAMono cells with some expression by CD14Mono cells, expression of GNLY in NK cells, expression of NKG7 in NK and CD8MemoryT cells as well as some CD8EffectorT cells, expression of FCER1A in DC, expression of CST3 in CD14Mono, FCGR3AMono and DC cells, and expression of PPBP in Platelets.](../../images/scrna-seurat-pbmc3k/seurat_DoHeatmap_CellType_markers_SCT.png "Heatmap showing expression of known PBMC markers by cluster") + +
+ +Rather than looking at the canonical markers, we might want to make a heatmap of the top markers from our DE analysis. +In order to do this, we'll need to turn the output from `FindAllMarkers` into a tabular file type. We can then cut out the column with the list of gene names, getting rid of the column header, and use this as an input for the plot. + +> Create Heatmaps to Compare Expression by Cluster - Markers from DE +> +> 1. Click on the {% icon galaxy-pencil %} pencil icon of the file we renamed as `DE Markers` (this was the CSV output from `FindAllMarkers`) then select {% icon galaxy-chart-select-data %} Datatypes in the central panel. Choose the second option, `Convert to Datatype` and make sure `tabular (using `Convert CSV to tabular`)` is selected in the drop down menu before pressing the `Create Dataset` button. This will create a new, tabular version of the dataset at the top of your history - make sure that this is the version you use in the next step. +> +> 2. {% tool [Table Compute](toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0) %} with the following parameters: +> - *"Input Single or Multiple Tables"*: `Single Table` +> - {% icon param-file %} *"Table"*: `DE Markers` (**tabular** output of **Convert CSV to tabular** {% icon tool %}) +> - *"Input data has"*: +> - `Select` Column names on the first row +> - `Unselect` Row names on the first column +> - *"Type of table operation"*: `Drop, keep or duplicate rows and columns` +> - *"List of columns to select"*: `1` +> - *"Output formatting options"*: +> - `Unselect` Output column headers +> - `Unselect` Output row headers +> +> 3. Rename this file as `Input DE Markers` - if you take a look at it using the {% icon galaxy-eye %}, you should see the column of gene names with no header line +> +> 4. {% tool [Seurat Visualize](toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0) %} with the following parameters: +> - {% icon param-file %} *"Input file with the Seurat object"*: `Annotated Clusters` (output of **Seurat Data Management** {% icon tool %}) +> - *"Method used"*: `Visualize expression with 'DoHeatmap'` +> - {% icon param-file %} *"List of features to plot"*: `Input DE Markers` (output of **Table Compute** {% icon tool %}) +> - In *"Plot Formatting Options"*: +> - *"Size of text above colour bar"*: `3.5` +> - *"Angle of text above colour bar"*: `60` +> - *"Change size of plot"*: `Yes` +> - *"Width of plot in pixels"*: `4100` +> - *"Height of plot in pixels"*: `4100` +> +{: .hands_on} + +
+ +![Heatmap showing blocks of higher expression for the top 10 markers in the clusters they are markers for. Most cells in each cluster express the markers for that cluster but the patterns are stronger for some genes than others. Some cells outside the clusters also express the markers. For example the CD8T cells show some expression of the top markers for CD4NaiveT and CD4MemoryT cells, although expression of their own top markers is much higher.](../../images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers.png "Heatmap showing expression of the top 10 markers for each cluster") + +
+ +
+ +![Heatmap showing blocks of higher expression for the top 10 markers in the clusters they are markers for. Most cells in each cluster express the markers for that cluster but the patterns are stronger for some genes than others. Some cells outside the clusters also express the markers. For example, the different T cell subtypes often show high expression of the top markers for other T cell groups.](../../images/scrna-seurat-pbmc3k/seurat_DoHeatmap_TopPositiveMarkers_SCT.png "Heatmap showing expression of the top 10 markers for each cluster") + +
+ +Looking at these plots, we can see that there are clear associations between certain genes and clusters. In both cases, we can also see that there is variation in the expression level of these genes within the associated clusters, with some cells expressing little or none of the gene. We can also see cells outside of the associated clusters that are expressing the genes. This is true for both the known cell type markers and the markers we identified through our DE analysis - even when a statistical test has identified a significant association with one cluster, there are still differences between cells. + +Comparing the two plots also shows us why the supervised approach can be faster - we had fewer genes to consider and the patterns can also be a bit clearer. If we know enough about the cells we're trying to identify, then we can make our lives easier by taking the supervised approach. However, it is always a good idea to try multiple approaches when we can, as we'll have more confidence in our results if they both end up giving the same answers! + +> +> 1. Which plot type was best for annotating clusters? +> 2. Are you happy with this clustering - even for cluster 020, the CD4NaiveT cells, which had a lot of ribosomal genes in its top markers list? +> > +> > 1. The best type of plot for identifying cell types can depend on your data as well as your own personal preferences. You might find one plot easier to interpret than another. It can also be helpful to create different types of plots as some patterns may be clearer on one type while others are clearer on another. It's also good to be able to confirm your interpretation on multiple plots! +> > UMAP plots are often used to provide a quick and memorable overview of the data, but it can be tricky to match the expression patterns to the clusters, especially given the limitations of these plots - some cells can be hidden, adjacent clusters can blend into each other, and we can't rely on the 2D plot to accurately represent all the relationships between cells and clusters. +> > Violin plots present each cluster separately, so they can make it easier to differentiate between clusters. Since every cluster is given the same amount of space on the plot, no matter how many cells it contains, violin plots can also make it easier to see what's going on with smaller clusters. We can also get a clearer idea of how much variation there is within and between clusters, although some of the points may still be hidden behind others. +> > Heatmaps can give us the best overview of the variation between cells as each cell is given its own little section on the plot. We can see how consistently the markers are expressed within the cluster and how common it is for cells outside the cluster to express the same genes. We can also see the overall patterns as blocks of cells with similar expression profiles, including the clusters that share similar patterns. However, heatmaps can be less useful if we want to focus on individual cells or genes, as it can be hard to pick out details. +> > 2. We should be happy with the results of our clustering as they match up with what we already know about PBMCs. We have been able to annotate each cluster as a different cell type based on a supervised approach - and we could do the same using an unsupervised approach. The decisions we made along the way, such as the number of PCs we used and how many nearest neighbors we looked for have worked well. We even identified some subtypes of T cells, although we were able to separate out more of these when we used SCTransform. +> > Even though many of the top markers for cluster 02 were from ribosomal genes, we were still able to identify it as a specific cell type. It represents our population of Naive CD4+ T cells. If we search online to learn a bit more about this cell type, we'll quickly find that it is known to have lots of ribosomes, so in this case, we can be confident that the high expression of ribosomal genes in these cells is due to real biological differences between cell types, rather than a problem with our data. If we weren't able to assign a cell type to this cluster, for example because it expressed a mix of markers for different types, then we would come to a different conclusion! +> > If we couldn't see strong associations between our clusters and the different cell types that we expect to see in the dataset, then this would be very suspicious - did something go wrong with our experiment or analysis? We would need to go back and try to identify the problem to see if we can fix it. If the problem isn't too bad, we might just need to change some of the clustering parameters to get clusters that make biological sense - maybe we would need to use more PCs, look for more/fewer nearest neighbors, or simply change the resolution. If the problem is more serious, we might need to recheck the quality of our data or make bigger changes to the analysis. +> {: .solution} +{: .question} + +# Conclusion +{% icon congratulations %} Well done, you've successfully used Seurat to prepare and cluster single cell data. You might want to check your results against the example histories for the [separate preprocessing steps](https://usegalaxy.eu/u/marisa_jl/h/clustering-3k-pbmcs-with-seurat---separate-preprocessing---answer-key) or [SCTransform route](https://usegalaxy.eu/u/marisa_jl/h/clustering-3k-pbmcs-with-seurat---sctransform---answer-key). You can also take a look at the whole workflow for [the separate steps](https://usegalaxy.eu/u/marisa_jl/w/copy-of-cluster-3k-pbmcs-with-seurat---workflow) or [SCTransform version](https://usegalaxy.eu/u/marisa_jl/w/cluster-3k-pbmcs-with-seurat---workflow---sctransform-version). + +In this tutorial, we've learned about the steps involved in clustering single cell data and how to identify different cell types. We followed a typical clustering workflow: + +1. Preprocessing with + - selection and filtering of cells based on quality metrics + - normalisation and scaling + - selection of features (highly variable genes) +2. Dimensional reduction + - using PCA and then UMAP +3. Clustering of the cells by + - computation of a neighborhood graph + - clustering the neighborhood graph into communities +4. Identification of marker genes for the clusters +5. Annotation of the clusters with cell types + +Now that you know how to perform clustering with Seurat, you might want to try the alternative Scanpy pipeline by following the [Clustering 3K PBMCs with Scanpy]({% link topics/single-cell/tutorials/scrna-scanpy-pbmc3k/tutorial.md %}) tutorial. If you would prefer to stay with Seurat, then you could go back and try using the alternative preprocessing steps described above or try using it to analyse a slightly trickier dataset by following the 'Filter, plot and explore single cell RNA-seq data with Seurat' tutorial [using Galaxy buttons]({% link topics/single-cell/tutorials/scrna-case_FilterPlotandExplore_SeuratTools/tutorial.html %}) or in an [R notebook on Galaxy]({% link topics/single-cell/tutorials/scrna-case_FilterPlotandExploreRStudio/tutorial.html %}). You could also try using Seurat to analyse your own data! + +This tutorial is part of the https://singlecell.usegalaxy.eu portal ({% cite tekman2020single %}). diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow-tests.yml b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow-tests.yml new file mode 100644 index 00000000000000..b884a964bbdfef --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow-tests.yml @@ -0,0 +1,31 @@ +- doc: Test the Clustering 3k PBMCs with Seurat tutorial with separate preprocessing steps + job: + matrix.mtx: + class: File + location: https://zenodo.org/records/3581213/files/matrix.mtx + filetype: mtx + genes.tsv: + class: File + location: https://zenodo.org/records/3581213/files/genes.tsv + filetype: tsv + barcodes.tsv: + class: File + location: https://zenodo.org/records/3581213/files/barcodes.tsv + filetype: tsv + Canonical Markers: + class: File + location: https://zenodo.org/records/14013475/files/Canonical_Markers.tabular + filetype: tabular + outputs: + Input 3k PBMC: + location: https://zenodo.org/records/14013475/files/Input_3k_PBMC.rds + compare: diff + Preprocessed Data: + location: https://zenodo.org/records/14013475/files/Preprocessed_Data.rds + compare: diff + Annotated Clusters: + location: https://zenodo.org/records/14013475/files/Annotated_Clusters.rds + compare: diff + Input DE Markers: + location: https://zenodo.org/records/14013475/files/Input_DE_Markers.tabular + compare: diff diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow.ga b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow.ga new file mode 100644 index 00000000000000..b3a05b9f3f164b --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow.ga @@ -0,0 +1,1850 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This is the workflow for the Clustering 3K PBMCs with Seurat tutorial if you are using the separate preprocessing tools (NormalizeData, FindVariableFeatures, ScaleData).", + "comments": [ + { + "color": "none", + "data": { + "line": [ + [ + 0, + 0 + ] + ], + "thickness": 5 + }, + "id": 0, + "position": [ + 1519.7, + 1235 + ], + "size": [ + 0, + 0 + ], + "type": "freehand" + }, + { + "color": "green", + "data": { + "line": [ + [ + 0, + 0 + ] + ], + "thickness": 5 + }, + "id": 1, + "position": [ + 3072, + 1480.2 + ], + "size": [ + 0, + 0 + ], + "type": "freehand" + }, + { + "color": "none", + "data": { + "line": [ + [ + 0, + 0 + ] + ], + "thickness": 5 + }, + "id": 2, + "position": [ + 1519.7, + 1235 + ], + "size": [ + 0, + 0 + ], + "type": "freehand" + }, + { + "color": "red", + "data": { + "text": "INPUTS" + }, + "id": 3, + "position": [ + 0, + 440 + ], + "size": [ + 340, + 440 + ], + "type": "markdown" + }, + { + "color": "green", + "data": { + "text": "CREATE SEURAT OBJECT & QUALITY CONTROL" + }, + "id": 4, + "position": [ + 400, + 220 + ], + "size": [ + 1370, + 1190 + ], + "type": "markdown" + }, + { + "color": "orange", + "data": { + "text": "PREPROCESSING" + }, + "id": 5, + "position": [ + 1810, + 880 + ], + "size": [ + 1160, + 600 + ], + "type": "markdown" + }, + { + "color": "blue", + "data": { + "text": "DIMENSIONAL REDUCTION & VISUALISATION" + }, + "id": 6, + "position": [ + 3080, + 0 + ], + "size": [ + 790, + 1540 + ], + "type": "markdown" + }, + { + "color": "pink", + "data": { + "text": "CLUSTERING" + }, + "id": 7, + "position": [ + 3890, + 1480 + ], + "size": [ + 540, + 240 + ], + "type": "markdown" + }, + { + "color": "yellow", + "data": { + "text": "UMAP & VISUALISATION" + }, + "id": 8, + "position": [ + 4440, + 1090 + ], + "size": [ + 530, + 780 + ], + "type": "markdown" + }, + { + "color": "turquoise", + "data": { + "text": "FIND MARKERS & ANNOTATE CELL TYPES" + }, + "id": 9, + "position": [ + 5010, + 730 + ], + "size": [ + 1450, + 1270 + ], + "type": "markdown" + } + ], + "creator": [ + { + "class": "Person", + "identifier": "0000-0001-6979-6930", + "name": "Marisa Loach" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "name": "Clustering 3k PBMCs with Seurat - Workflow", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Single cell expression data in Matrix Market (mtx) format. This is the table that shows the counts of each RNA in each cell.", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Single cell expression data in Matrix Market (mtx) format. This is the table that shows the counts of each RNA in each cell.", + "name": "matrix.mtx" + } + ], + "label": "matrix.mtx", + "name": "matrix.mtx", + "outputs": [], + "position": { + "left": 60, + "top": 500 + }, + "tool_id": null, + "tool_state": "{\"name\": \"matrix.mtx\"}", + "tool_version": null, + "type": "data_input", + "uuid": "718fab00-a27a-4e7c-8c57-ce12edede577", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Table of gene names for the features in the matrix.", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Table of gene names for the features in the matrix.", + "name": "genes.tsv" + } + ], + "label": "genes.tsv", + "name": "genes.tsv", + "outputs": [], + "position": { + "left": 60, + "top": 630 + }, + "tool_id": null, + "tool_state": "{\"name\": \"genes.tsv\"}", + "tool_version": null, + "type": "data_input", + "uuid": "e7aa2256-020f-4394-827b-f138a489ad30", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "Table of barcodes for the cells in the matrix.", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Table of barcodes for the cells in the matrix.", + "name": "barcodes.tsv" + } + ], + "label": "barcodes.tsv", + "name": "barcodes.tsv", + "outputs": [], + "position": { + "left": 60, + "top": 770 + }, + "tool_id": null, + "tool_state": "{\"name\": \"barcodes.tsv\"}", + "tool_version": null, + "type": "data_input", + "uuid": "f5d6d896-f8bd-4ac9-a804-ffa24a8fcba0", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "Paste in a list of canonical markers for PBMCs - this is given in the Clustering 3K PBMCs with Seurat tutorial.", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "Paste in a list of canonical markers for PBMCs - this is given in the Clustering 3K PBMCs with Seurat tutorial.", + "name": "Canonical Markers" + } + ], + "label": "Canonical Markers", + "name": "Canonical Markers", + "outputs": [], + "position": { + "left": 5790, + "top": 1480 + }, + "tool_id": null, + "tool_state": "{\"name\": \"Canonical Markers\"}", + "tool_version": null, + "type": "data_input", + "uuid": "2c0e8bbf-df2c-4255-ba4a-d162062c0e6e", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "method|input_type|cell_barcodes": { + "id": 2, + "output_name": "output" + }, + "method|input_type|gene_names": { + "id": 1, + "output_name": "output" + }, + "method|input_type|matrix": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": "Input 3k PBMC", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 440, + "top": 550 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"CreateSeuratObject\", \"__current_case__\": 0, \"input_type\": {\"input_type\": \"mtx\", \"__current_case__\": 0, \"matrix\": {\"__class__\": \"ConnectedValue\"}, \"citeseq_boolean\": false, \"gene_names\": {\"__class__\": \"ConnectedValue\"}, \"cell_barcodes\": {\"__class__\": \"ConnectedValue\"}, \"gene_column\": \"2\", \"cell_column\": \"1\", \"unique_features\": true, \"strip_suffix\": false}, \"meta_data\": null, \"assay\": \"RNA\", \"min_cells\": \"3\", \"min_features\": \"200\", \"names_field\": null, \"names_delim\": \"\", \"percent_mt\": {\"percent_mt\": \"false\", \"__current_case__\": 0}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "95063641-a668-4ee6-a615-5824df102d8c", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Input 3k PBMC"} + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "method|seurat_rds": { + "id": 4, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Create", + "name": "method" + } + ], + "label": "Mitochondrial Annotations", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 760, + "top": 760 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"Add_QC_Metrics\", \"__current_case__\": 1, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"match\": {\"match\": \"pattern\", \"__current_case__\": 0, \"pattern\": \"^MT-\"}, \"col_name\": \"percent.mt\", \"assay\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "7fbe6bf7-e4b6-4efd-b865-8094e8623836", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1070, + "top": 350 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"nFeature_RNA,nCount_RNA,percent.mt\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"3\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "697ee6d8-9158-4ccf-9d6d-b2828d779f3f", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 7, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1070, + "top": 540 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"percent.mt\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "52607358-08a3-4b68-8639-584b3d7fa13a", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 8, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1070, + "top": 760 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"nFeature_RNA\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "4efcdbd6-4dc0-41e1-bb8b-7da87c73345e", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "method|seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Create", + "name": "method" + } + ], + "label": "Filtered Dataset", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 1070, + "top": 1010 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FilterCells\", \"__current_case__\": 2, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"minimum_nFeature_RNA\": \"200\", \"maximum_nFeature_RNA\": \"2500\", \"minimum_nCount_RNA\": null, \"maximum_nCount_RNA\": null, \"minimum_percent_mt\": null, \"maximum_percent_mt\": \"5.0\", \"other\": {\"other\": \"false\", \"__current_case__\": 0}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "632612d2-a3ee-4b9c-8c4a-8f3d19fec3af", + "when": null, + "workflow_outputs": [] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 10, + "input_connections": { + "seurat_rds": { + "id": 9, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1450, + "top": 620 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"nFeature_RNA,nCount_RNA,percent.mt\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"3\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "ed85898a-deee-49a4-8f8a-19db52bc4fd6", + "when": null, + "workflow_outputs": [] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "seurat_rds": { + "id": 9, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1450, + "top": 790 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"percent.mt\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "5fd64e5a-a7d6-411c-9ac0-249f221e8313", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 12, + "input_connections": { + "seurat_rds": { + "id": 9, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1440, + "top": 1010 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"nFeature_RNA\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "f0d7632c-5db9-4fb3-b324-7eab234be5f8", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "errors": null, + "id": 13, + "input_connections": { + "seurat_rds": { + "id": 9, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Preprocessing", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 1830, + "top": 1220 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "c3170652bd98", + "name": "seurat_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"NormalizeData\", \"__current_case__\": 0, \"assay\": \"\", \"normalization_method\": {\"normalization_method\": \"LogNormalize\", \"__current_case__\": 0}, \"scale_factor\": \"10000\", \"block_size\": null}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "69f1381a-1777-403d-9e55-8d87484bcea1", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "errors": null, + "id": 14, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Preprocessing", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "variable_tabular", + "type": "txt" + } + ], + "position": { + "left": 2130, + "top": 1210 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "c3170652bd98", + "name": "seurat_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindVariableFeatures\", \"__current_case__\": 1, \"assay\": \"\", \"selection_method\": {\"selection_method\": \"vst\", \"__current_case__\": 0, \"loess_span\": \"0.3\", \"clip_max\": null, \"nfeatures\": \"2000\"}, \"num_bin\": \"20\", \"binning_method\": \"equal_width\", \"output_topN\": {\"output_topN\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "b0dc31f5-4fd7-443e-9860-3e9b5821678f", + "when": null, + "workflow_outputs": [] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "errors": null, + "id": 15, + "input_connections": { + "seurat_rds": { + "id": 14, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": "Preprocessed Data", + "name": "Seurat Preprocessing", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 2400, + "top": 1240 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "c3170652bd98", + "name": "seurat_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"ScaleData\", \"__current_case__\": 2, \"assay\": \"\", \"regress\": {\"regress\": \"false\", \"__current_case__\": 1}, \"scale_features\": {\"scale_features\": \"all_genes\", \"__current_case__\": 1}, \"split_by\": \"\", \"do_scale\": true, \"do_center\": true, \"scale_max\": \"10.0\", \"block_size\": \"1000\", \"min_cells_to_block\": \"3000\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "65ec07fc-f84b-4d82-9c82-6278f47b6150", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Preprocessed Data"} + ] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 16, + "input_connections": { + "seurat_rds": { + "id": 15, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2670, + "top": 1040 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VariableFeaturePlot\", \"__current_case__\": 3, \"assay\": \"\", \"label_topN\": {\"label_topN\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}, \"plot\": {\"cols_1\": \"black\", \"cols_2\": \"red\", \"pt_size\": null, \"log\": false, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "b6203931-009d-4180-99a0-aab83b1a491f", + "when": null, + "workflow_outputs": [] + }, + "17": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "errors": null, + "id": 17, + "input_connections": { + "seurat_rds": { + "id": 15, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Run Dimensional Reduction", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "top_pcs", + "type": "txt" + } + ], + "position": { + "left": 3230, + "top": 1280 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e49429e8d6dd", + "name": "seurat_reduce_dimension", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"RunPCA\", \"__current_case__\": 0, \"assay\": \"\", \"npcs\": \"50\", \"rev_pca\": false, \"weight_by_var\": true, \"seed_use\": \"42\", \"approx\": true, \"features\": null, \"reduction_name\": \"pca\", \"reduction_key\": \"PC_\", \"print_pcs\": {\"print_pcs\": \"true\", \"__current_case__\": 1, \"dims\": \"5\", \"nfeatures\": \"5\"}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "4067139f-81dc-4e26-b0b0-ea84e2ccea73", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 18, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3580, + "top": 69.11470214107226 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VizDimLoadings\", \"__current_case__\": 4, \"dims\": \"3\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"projected\": false, \"balanced\": false, \"plot\": {\"col\": \"blue\", \"ncol\": \"3\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3000\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "70cc100d-c796-4102-8d27-cdedbc875a7e", + "when": null, + "workflow_outputs": [] + }, + "19": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 19, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3590, + "top": 230 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"pca\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "6b1053b0-7dc9-4461-8bbd-59e890701c32", + "when": null, + "workflow_outputs": [] + }, + "20": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 20, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3590, + "top": 450 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"pca\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "29e6790d-519f-47de-9081-d1e131ccaf0f", + "when": null, + "workflow_outputs": [] + }, + "21": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 21, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3590, + "top": 660 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP\", \"dims_1\": \"2\", \"dims_2\": \"3\", \"reduction\": \"pca\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "62528f28-6eb1-4c84-810f-65328c04466d", + "when": null, + "workflow_outputs": [] + }, + "22": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 22, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3590, + "top": 880 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimHeatmap\", \"__current_case__\": 6, \"dims\": \"1\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"plot\": {\"disp_min\": \"-2.5\", \"disp_max\": null, \"ncol\": null, \"raster\": true}, \"adv\": {\"cells\": \"500\", \"slot\": \"scale.data\", \"assays\": \"\", \"projected\": false, \"balanced\": true}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "27b3000e-57db-4a47-94c8-90cc1c42b024", + "when": null, + "workflow_outputs": [] + }, + "23": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 23, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3590, + "top": 1050 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimHeatmap\", \"__current_case__\": 6, \"dims\": \"1:15\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"plot\": {\"disp_min\": \"-2.5\", \"disp_max\": null, \"ncol\": null, \"raster\": true}, \"adv\": {\"cells\": \"500\", \"slot\": \"scale.data\", \"assays\": \"\", \"projected\": false, \"balanced\": true}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4400\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "c4aad18c-ce5f-4afc-9a1e-97c090d560c3", + "when": null, + "workflow_outputs": [] + }, + "24": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 24, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3600, + "top": 1210 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"ElbowPlot\", \"__current_case__\": 7, \"ndims\": \"30\", \"reduction\": \"pca\"}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "49ed4762-c22b-474f-85b2-2818b886b4f4", + "when": null, + "workflow_outputs": [] + }, + "25": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 25, + "input_connections": { + "seurat_rds": { + "id": 17, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 3920, + "top": 1520 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindNeighbors\", \"__current_case__\": 0, \"reduction\": \"pca\", \"dims\": \"10\", \"k_param\": \"20\", \"nn_method\": {\"nn_method\": \"annoy\", \"__current_case__\": 1, \"annoy_metric\": \"euclidean\"}, \"adv\": {\"n_trees\": \"50\", \"l2_norm\": false, \"compute_snn\": {\"compute_snn\": \"TRUE\", \"__current_case__\": 1, \"prune_snn\": null, \"distance_matrix\": false}}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "d77467f9-360e-46de-a998-0e75c21e5414", + "when": null, + "workflow_outputs": [] + }, + "26": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 26, + "input_connections": { + "seurat_rds": { + "id": 25, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 4170, + "top": 1520 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindClusters\", \"__current_case__\": 2, \"modularity_fxn\": \"1\", \"resolution\": \"0.5\", \"algorithm\": {\"algorithm\": \"1\", \"__current_case__\": 1}, \"n_start\": \"10\", \"n_iter\": \"10\", \"random_seed\": \"0\", \"group_singletons\": true, \"graph_name\": \"\", \"cluster_name\": \"\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "c9569499-2cdf-41ab-b39f-cf3acc5009ee", + "when": null, + "workflow_outputs": [] + }, + "27": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "errors": null, + "id": 27, + "input_connections": { + "seurat_rds": { + "id": 26, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Run Dimensional Reduction", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 4460, + "top": 1530 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e49429e8d6dd", + "name": "seurat_reduce_dimension", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"RunUMAP\", \"__current_case__\": 2, \"reduction\": \"pca\", \"assay\": \"\", \"umap_method\": {\"umap_method\": \"uwot\", \"__current_case__\": 0}, \"n_neighbors\": \"30\", \"n_components\": \"2\", \"metric\": \"cosine\", \"run_umap_on\": {\"run_umap_on\": \"dims\", \"__current_case__\": 0, \"dims\": \"10\"}, \"adv\": {\"reduction_model\": \"\", \"n_epochs\": null, \"learning_rate\": \"1\", \"min_dist\": \"0.3\", \"spread\": \"1\", \"set_op_mix_ratio\": \"1.0\", \"local_connectivity\": \"1\", \"repulsion_strength\": \"1\", \"negative_sample_rate\": \"5\", \"a\": \"\", \"b\": \"\", \"uwot_sgd\": false, \"seed_use\": \"42\", \"angular_rp_forest\": false, \"reduction_name\": \"umap\", \"reduction_key\": \"UMAP_\"}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "e7312792-c546-41c1-a869-f92ad15e5a14", + "when": null, + "workflow_outputs": [] + }, + "28": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 28, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4690, + "top": 1140 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "f90608a3-4776-4e56-864e-2abbcec3a436", + "when": null, + "workflow_outputs": [] + }, + "29": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 29, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4700, + "top": 1370 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "212ead79-a572-4afa-9a10-cbddfd2d1d5b", + "when": null, + "workflow_outputs": [] + }, + "30": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 30, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4690, + "top": 1680 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"CST3,CD79A,HLA-DQA1, MALAT1,NKG7,PPBP\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"3\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "98e9f5a5-daf4-4351-8bb6-f147e69b99e5", + "when": null, + "workflow_outputs": [] + }, + "31": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 31, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "markers_tabular", + "type": "csv" + } + ], + "position": { + "left": 5050, + "top": 1750 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindAllMarkers\", \"__current_case__\": 3, \"features\": null, \"logfc_threshold\": \"1.0\", \"test_use\": {\"test_use\": \"wilcox\", \"__current_case__\": 0, \"slot\": \"data\"}, \"set_top_markers\": {\"set_top_markers\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}, \"adv\": {\"base\": \"2\", \"assay\": \"\", \"fc_name\": \"\", \"min_pct\": \"0.01\", \"min_diff_pct\": null, \"only_pos\": true, \"max_cells_per_ident\": null, \"random_seed\": \"1\", \"min_cells_group\": \"3\", \"densify\": false}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "8f85e3ff-14ac-4af3-9837-6f5548300959", + "when": null, + "workflow_outputs": [] + }, + "32": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 32, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5410, + "top": 1070 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"CD79A,MS4A1\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": null, \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "ad372ba7-6efd-49d1-8b7f-6ffebd4f244e", + "when": null, + "workflow_outputs": [] + }, + "33": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 33, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5410, + "top": 1240 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"IL7R,CCR7,S100A4,CD8A\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"4\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "65ced1b0-d05b-4d24-93ad-9fff43d3b144", + "when": null, + "workflow_outputs": [] + }, + "34": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 34, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5400, + "top": 1390 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"IL7R,CCR7,CD14,LYZ,S100A4,MS4A1,CD8A,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "06e3a055-84f2-4c07-8ca5-094b58abdfa2", + "when": null, + "workflow_outputs": [] + }, + "35": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 35, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5400, + "top": 1600 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"IL7R,CCR7,CD14,LYZ,S100A4,MS4A1,CD8A,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": null, \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "67c224e7-e0d5-4a62-ab76-09fe5ce6535a", + "when": null, + "workflow_outputs": [] + }, + "36": { + "annotation": "This is the final annotated Seurat Object with each cluster labelled by cell type.", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "errors": null, + "id": 36, + "input_connections": { + "seurat_rds": { + "id": 27, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": "Annotated Clusters", + "name": "Seurat Data Management", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 5720, + "top": 1260 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "50c5abeb08ba", + "name": "seurat_data", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"Manipulate\", \"__current_case__\": 1, \"manipulate\": {\"manipulate\": \"Rename_Idents\", \"__current_case__\": 1, \"rename\": {\"rename_all\": \"true\", \"__current_case__\": 0, \"new_names\": \"CD4 Naive T, CD14 Mono, CD4 Memory T, B, CD8 T,FCGR3A Mono, NK, DC, Platelet\"}, \"stash_idents\": {\"stash_idents\": \"true\", \"__current_case__\": 1, \"old_ident\": \"old.ident\"}}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "611b1791-1efe-4bdd-ad70-3b072be28866", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Annotated Clusters"} + ] + }, + "37": { + "annotation": "", + "content_id": "csv_to_tabular", + "errors": null, + "id": 37, + "input_connections": { + "csv": { + "id": 31, + "output_name": "markers_tabular" + } + }, + "inputs": [], + "label": null, + "name": "Convert CSV to tabular", + "outputs": [ + { + "name": "tabular", + "type": "tabular" + } + ], + "position": { + "left": 5600, + "top": 1780 + }, + "post_job_actions": {}, + "tool_id": "csv_to_tabular", + "tool_state": "{\"__input_ext\": \"input\", \"__target_datatype__\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"csv\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "034d7fec-8df2-4f43-ab0e-b67b8a6fde68", + "when": null, + "workflow_outputs": [] + }, + "38": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 38, + "input_connections": { + "seurat_rds": { + "id": 36, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 6180, + "top": 1090 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "97c5a7c8-e33b-4fec-a13f-6075acee66c3", + "when": null, + "workflow_outputs": [] + }, + "39": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 39, + "input_connections": { + "method|features": { + "id": 3, + "output_name": "output" + }, + "seurat_rds": { + "id": 36, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Visualize", + "name": "method" + } + ], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 6170, + "top": 1330 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DoHeatmap\", \"__current_case__\": 9, \"features\": {\"__class__\": \"ConnectedValue\"}, \"cells\": null, \"plot\": {\"group_bar\": true, \"group_colors\": \"\", \"disp_min\": \"-2.5\", \"disp_max\": null, \"label\": true, \"size\": \"3.5\", \"hjust\": \"0.0\", \"vjust\": \"0.0\", \"angle\": \"60\", \"raster\": true, \"draw_lines\": true, \"lines_width\": null, \"group_bar_height\": \"0.02\"}, \"adv\": {\"group_by\": \"ident\", \"slot\": \"scale.data\", \"assay\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "9a21f792-77fb-4703-8fa8-d6f8a0aa2084", + "when": null, + "workflow_outputs": [] + }, + "40": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", + "errors": null, + "id": 40, + "input_connections": { + "singtabop|input": { + "id": 37, + "output_name": "tabular" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Table Compute", + "name": "singtabop" + } + ], + "label": "Input DE Markers", + "name": "Table Compute", + "outputs": [ + { + "name": "table", + "type": "tabular" + } + ], + "position": { + "left": 5870, + "top": 1710 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3bf5661c0280", + "name": "table_compute", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"out_opts\": [\"ignore_nas\"], \"precision\": \"6\", \"singtabop\": {\"use_type\": \"single\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"col_row_names\": [\"has_col_names\"], \"adv\": {\"header\": null, \"nrows\": null, \"skipfooter\": null, \"skip_blank_lines\": true}, \"user\": {\"mode\": \"select\", \"__current_case__\": 1, \"select_cols_wanted\": \"1\", \"select_rows_wanted\": null, \"select_keepdupe\": [\"select_cols_keepdupe\", \"select_rows_keepdupe\"]}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.4+galaxy0", + "type": "tool", + "uuid": "7f443c50-5cca-47e8-b651-03c0c61e6ec5", + "when": null, + "workflow_outputs": [ + {"output_name": "table", "label": "Input DE Markers"} + ] + }, + "41": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 41, + "input_connections": { + "method|features": { + "id": 40, + "output_name": "table" + }, + "seurat_rds": { + "id": 36, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Visualize", + "name": "method" + } + ], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 6180, + "top": 1640 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DoHeatmap\", \"__current_case__\": 9, \"features\": {\"__class__\": \"ConnectedValue\"}, \"cells\": null, \"plot\": {\"group_bar\": true, \"group_colors\": \"\", \"disp_min\": \"-2.5\", \"disp_max\": null, \"label\": true, \"size\": \"3.5\", \"hjust\": \"0.0\", \"vjust\": \"0.0\", \"angle\": \"60\", \"raster\": true, \"draw_lines\": true, \"lines_width\": null, \"group_bar_height\": \"0.02\"}, \"adv\": {\"group_by\": \"ident\", \"slot\": \"scale.data\", \"assay\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "a6a6a8d6-b4ba-4b05-a6e6-1529f2947b7d", + "when": null, + "workflow_outputs": [] + } + }, + "tags": [ + "name:singlecell", + "name:seurat" + ], + "uuid": "0ac15d31-860d-4adb-9758-2220ad70289d", + "version": 6 +} diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT-tests.yml b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT-tests.yml new file mode 100644 index 00000000000000..ff4ecc0f371533 --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT-tests.yml @@ -0,0 +1,32 @@ +- doc: Test the Clustering 3k PBMCs with Seurat tutorial with SCTransform + job: + Matrix: + matrix.mtx: + class: File + location: https://zenodo.org/records/3581213/files/matrix.mtx + filetype: mtx + genes.tsv: + class: File + location: https://zenodo.org/records/3581213/files/genes.tsv + filetype: tsv + barcodes.tsv: + class: File + location: https://zenodo.org/records/3581213/files/barcodes.tsv + filetype: tsv + Canonical Markers SCT: + class: File + location: https://zenodo.org/records/14013638/files/Pasted_Entry.tabular + filetype: tabular + outputs: + Input 3k PBMC: + location: https://zenodo.org/records/14013638/files/Input_3k_PBMC.rds + compare: diff + Preprocessed Data: + location: https://zenodo.org/records/14013638/files/Galaxy11-Preprocessed_Data.rds + compare: diff + Annotated Clusters: + location: https://zenodo.org/records/14013638/files/Annotated_Clusters.rds + compare: diff + Input DE Markers: + location: https://zenodo.org/records/14013638/files/Input_DE_Markers.tabular + compare: diff diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT.ga b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT.ga new file mode 100644 index 00000000000000..46d33c59b0c778 --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/Seurat_PBMC_Workflow_SCT.ga @@ -0,0 +1,1667 @@ +{ + "a_galaxy_workflow": "true", + "annotation": "This is the workflow for the Clustering 3K PBMCs with Seurat tutorial if you use SCTransform for preprocessing.", + "comments": [ + { + "color": "green", + "data": { + "text": "CREATE SEURATOBJECT & QUALITY CHECK" + }, + "id": 0, + "position": [ + 320, + 720 + ], + "size": [ + 1020, + 1010 + ], + "type": "markdown" + }, + { + "color": "red", + "data": { + "text": "INPUTS" + }, + "id": 1, + "position": [ + 0, + 1120 + ], + "size": [ + 280, + 350 + ], + "type": "markdown" + }, + { + "color": "blue", + "data": { + "text": "PREPROCESSING" + }, + "id": 2, + "position": [ + 1350, + 1140 + ], + "size": [ + 540, + 490 + ], + "type": "markdown" + }, + { + "color": "orange", + "data": { + "text": "DIMENSIONAL REDUCTION & VISUALISATION" + }, + "id": 3, + "position": [ + 1930, + 0 + ], + "size": [ + 680, + 1750 + ], + "type": "markdown" + }, + { + "color": "pink", + "data": { + "text": "CLUSTERING" + }, + "id": 4, + "position": [ + 2690, + 1500 + ], + "size": [ + 550, + 290 + ], + "type": "markdown" + }, + { + "color": "lime", + "data": { + "text": "UMAP & VISUALISATIO" + }, + "id": 5, + "position": [ + 3290, + 880 + ], + "size": [ + 570, + 950 + ], + "type": "markdown" + }, + { + "color": "yellow", + "data": { + "text": "FIND MARKERS & ANNOTATE CELL TYPES" + }, + "id": 6, + "position": [ + 4090, + 120 + ], + "size": [ + 2100, + 2040 + ], + "type": "markdown" + } + ], + "creator": [ + { + "class": "Person", + "identifier": "0000-0001-6979-6930", + "name": "Marisa Loach" + } + ], + "format-version": "0.1", + "license": "CC-BY-4.0", + "name": "Cluster 3k PBMCs with Seurat - Workflow - SCTransform Version", + "report": { + "markdown": "\n# Workflow Execution Report\n\n## Workflow Inputs\n```galaxy\ninvocation_inputs()\n```\n\n## Workflow Outputs\n```galaxy\ninvocation_outputs()\n```\n\n## Workflow\n```galaxy\nworkflow_display()\n```\n" + }, + "steps": { + "0": { + "annotation": "Single cell expression data in Matrix Market (mtx) format. This is the table that shows the counts of each RNA in each cell.", + "content_id": null, + "errors": null, + "id": 0, + "input_connections": {}, + "inputs": [ + { + "description": "Single cell expression data in Matrix Market (mtx) format. This is the table that shows the counts of each RNA in each cell.", + "name": "matrix.mtx" + } + ], + "label": "matrix.mtx", + "name": "matrix.mtx", + "outputs": [], + "position": { + "left": 40, + "top": 1180 + }, + "tool_id": null, + "tool_state": "{\"name\": \"matrix.mtx\"}", + "tool_version": null, + "type": "data_input", + "uuid": "2b5746f6-1fc9-4631-87a6-141bbcae6651", + "when": null, + "workflow_outputs": [] + }, + "1": { + "annotation": "Table of gene names for the features in the matrix.", + "content_id": null, + "errors": null, + "id": 1, + "input_connections": {}, + "inputs": [ + { + "description": "Table of gene names for the features in the matrix.", + "name": "genes.tsv" + } + ], + "label": "genes.tsv", + "name": "genes.tsv", + "outputs": [], + "position": { + "left": 40, + "top": 1260 + }, + "tool_id": null, + "tool_state": "{\"name\": \"genes.tsv\"}", + "tool_version": null, + "type": "data_input", + "uuid": "3b8bd964-8601-4617-a6af-0828f921510a", + "when": null, + "workflow_outputs": [] + }, + "2": { + "annotation": "Table of barcodes for the cells in the matrix.", + "content_id": null, + "errors": null, + "id": 2, + "input_connections": {}, + "inputs": [ + { + "description": "Table of barcodes for the cells in the matrix.", + "name": "barcodes.tsv" + } + ], + "label": "barcodes.tsv", + "name": "barcodes.tsv", + "outputs": [], + "position": { + "left": 40, + "top": 1340 + }, + "tool_id": null, + "tool_state": "{\"name\": \"barcodes.tsv\"}", + "tool_version": null, + "type": "data_input", + "uuid": "0c7c757f-097a-40e1-a549-5e2b792a2a5d", + "when": null, + "workflow_outputs": [] + }, + "3": { + "annotation": "Paste in a list of canonical markers for PBMCs - this is given in the Clustering 3K PBMCs with Seurat tutorial.", + "content_id": null, + "errors": null, + "id": 3, + "input_connections": {}, + "inputs": [ + { + "description": "Paste in a list of canonical markers for PBMCs - this is given in the Clustering 3K PBMCs with Seurat tutorial.", + "name": "Canonical Markers SCT" + } + ], + "label": "Canonical Markers SCT", + "name": "Canonical Markers SCT", + "outputs": [], + "position": { + "left": 5550, + "top": 1690 + }, + "tool_id": null, + "tool_state": "{\"name\": \"Canonical Markers SCT\"}", + "tool_version": null, + "type": "data_input", + "uuid": "74704209-04fc-47fe-a002-a4f1aeee36ce", + "when": null, + "workflow_outputs": [] + }, + "4": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 4, + "input_connections": { + "method|input_type|cell_barcodes": { + "id": 2, + "output_name": "output" + }, + "method|input_type|gene_names": { + "id": 1, + "output_name": "output" + }, + "method|input_type|matrix": { + "id": 0, + "output_name": "output" + } + }, + "inputs": [], + "label": "Input 3k PBMCs", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 370, + "top": 1150 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"CreateSeuratObject\", \"__current_case__\": 0, \"input_type\": {\"input_type\": \"mtx\", \"__current_case__\": 0, \"matrix\": {\"__class__\": \"ConnectedValue\"}, \"citeseq_boolean\": false, \"gene_names\": {\"__class__\": \"ConnectedValue\"}, \"cell_barcodes\": {\"__class__\": \"ConnectedValue\"}, \"gene_column\": \"2\", \"cell_column\": \"1\", \"unique_features\": true, \"strip_suffix\": false}, \"meta_data\": null, \"assay\": \"RNA\", \"min_cells\": \"3\", \"min_features\": \"200\", \"names_field\": null, \"names_delim\": \"\", \"percent_mt\": {\"percent_mt\": \"false\", \"__current_case__\": 0}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "fcaabd49-a903-4e86-9764-fb8d280e3ceb", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Input 3k PBMC"} + ] + }, + "5": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 5, + "input_connections": { + "method|seurat_rds": { + "id": 4, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Create", + "name": "method" + } + ], + "label": "Mitochondrial Annotations", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 630, + "top": 1340 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"Add_QC_Metrics\", \"__current_case__\": 1, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"match\": {\"match\": \"pattern\", \"__current_case__\": 0, \"pattern\": \"^MT-\"}, \"col_name\": \"percent.mt\", \"assay\": \"\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "2678868b-db68-46e6-9f99-d6f4608e426b", + "when": null, + "workflow_outputs": [] + }, + "6": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 6, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 980, + "top": 770 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"nFeature_RNA,nCount_RNA,percent.mt\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"3\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "46b1c3fd-4e1b-4ce6-b092-b22b7a35cf6c", + "when": null, + "workflow_outputs": [] + }, + "7": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 7, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 980, + "top": 930 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"percent.mt\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "7b2070ae-49dd-4dce-acbd-1e1755d30b54", + "when": null, + "workflow_outputs": [] + }, + "8": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 8, + "input_connections": { + "seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 980, + "top": 1140 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeatureScatter\", \"__current_case__\": 1, \"feature1\": \"nCount_RNA\", \"feature2\": \"nFeature_RNA\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"smooth\": false, \"plot_cor\": true, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"log\": false, \"jitter\": false}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"group_by\": \"\", \"split_by\": \"\", \"span\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "b65822db-be1b-4af5-b58c-1302080479ed", + "when": null, + "workflow_outputs": [] + }, + "9": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "errors": null, + "id": 9, + "input_connections": { + "method|seurat_rds": { + "id": 5, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Create", + "name": "method" + } + ], + "label": "Filtered Dataset", + "name": "Seurat Create", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 1070, + "top": 1420 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_create/seurat_create/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "d0c26c9430f2", + "name": "seurat_create", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FilterCells\", \"__current_case__\": 2, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"minimum_nFeature_RNA\": \"200\", \"maximum_nFeature_RNA\": \"2500\", \"minimum_nCount_RNA\": null, \"maximum_nCount_RNA\": null, \"minimum_percent_mt\": null, \"maximum_percent_mt\": \"5.0\", \"other\": {\"other\": \"false\", \"__current_case__\": 0}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "e69bad32-53ec-4f77-86c8-25ff656ef151", + "when": null, + "workflow_outputs": [] + }, + "10": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "errors": null, + "id": 10, + "input_connections": { + "seurat_rds": { + "id": 9, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": "Preprocessed Data", + "name": "Seurat Preprocessing", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "variable_tabular", + "type": "txt" + } + ], + "position": { + "left": 1370, + "top": 1400 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_preprocessing/seurat_preprocessing/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "c3170652bd98", + "name": "seurat_preprocessing", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"SCTransform\", \"__current_case__\": 3, \"assay\": \"RNA\", \"new_assay_name\": \"SCT\", \"residual_features\": {\"residual_features_options\": \"NULL\", \"__current_case__\": 0, \"variable_features\": {\"variable_features\": \"set_number\", \"__current_case__\": 0, \"variable_features_n\": \"3000\"}}, \"output_topN\": {\"output_topN\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}, \"vars_to_regress\": \"percent.mt\", \"do_scale\": false, \"do_center\": true, \"min_clip_range\": null, \"max_clip_range\": null, \"adv\": {\"do_correct_umi\": true, \"ncells\": \"5000\", \"seed_use\": \"1448145\", \"vst_flavor\": \"v2\", \"conserve_memory\": {\"conserve_memory\": \"FALSE\", \"__current_case__\": 0, \"return_only_var_genes\": true}}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "16d5622f-45fb-4988-92b7-3ae7652143a8", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Preprocessed Data"} + ] + }, + "11": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "errors": null, + "id": 11, + "input_connections": { + "seurat_rds": { + "id": 10, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Data Management", + "outputs": [ + { + "name": "inspect_general", + "type": "txt" + } + ], + "position": { + "left": 1650, + "top": 1160 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "50c5abeb08ba", + "name": "seurat_data", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"Inspect\", \"__current_case__\": 0, \"inspect\": {\"inspect\": \"General\", \"__current_case__\": 0}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "16c4d23d-d422-48c0-ab1f-80dc15a405bb", + "when": null, + "workflow_outputs": [] + }, + "12": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 12, + "input_connections": { + "seurat_rds": { + "id": 10, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 1650, + "top": 1320 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VariableFeaturePlot\", \"__current_case__\": 3, \"assay\": \"\", \"label_topN\": {\"label_topN\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}, \"plot\": {\"cols_1\": \"black\", \"cols_2\": \"red\", \"pt_size\": null, \"log\": false, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "de012846-fa8b-4afb-82f3-59a8ebf18085", + "when": null, + "workflow_outputs": [] + }, + "13": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "errors": null, + "id": 13, + "input_connections": { + "seurat_rds": { + "id": 10, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Run Dimensional Reduction", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "top_pcs", + "type": "txt" + } + ], + "position": { + "left": 1950, + "top": 1470 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e49429e8d6dd", + "name": "seurat_reduce_dimension", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"RunPCA\", \"__current_case__\": 0, \"assay\": \"\", \"npcs\": \"50\", \"rev_pca\": false, \"weight_by_var\": true, \"seed_use\": \"42\", \"approx\": true, \"features\": null, \"reduction_name\": \"pca\", \"reduction_key\": \"PC_\", \"print_pcs\": {\"print_pcs\": \"true\", \"__current_case__\": 1, \"dims\": \"5\", \"nfeatures\": \"5\"}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "e6cefaf5-fcfc-4aba-aa87-69b2f13c70f7", + "when": null, + "workflow_outputs": [] + }, + "14": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 14, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2240, + "top": 30 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VizDimLoadings\", \"__current_case__\": 4, \"dims\": \"3\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"projected\": false, \"balanced\": false, \"plot\": {\"col\": \"blue\", \"ncol\": \"3\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3000\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "3d04eb35-d477-4006-92a1-3921c6ec9a74", + "when": null, + "workflow_outputs": [] + }, + "15": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 15, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2250, + "top": 200 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"pca\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "9cd00c6d-04a3-4c3f-b82f-00e8f89f2b29", + "when": null, + "workflow_outputs": [] + }, + "16": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 16, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2250, + "top": 430 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"pca\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "bbf50287-0fc4-42df-bbe6-c60a7e13513a", + "when": null, + "workflow_outputs": [] + }, + "17": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 17, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2250, + "top": 660 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74\", \"dims_1\": \"2\", \"dims_2\": \"3\", \"reduction\": \"pca\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "649fd303-f759-4c86-8e62-0c5d9ceb678d", + "when": null, + "workflow_outputs": [] + }, + "18": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 18, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2250, + "top": 880 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimHeatmap\", \"__current_case__\": 6, \"dims\": \"1\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"plot\": {\"disp_min\": \"-2.5\", \"disp_max\": null, \"ncol\": null, \"raster\": true}, \"adv\": {\"cells\": \"500\", \"slot\": \"scale.data\", \"assays\": \"\", \"projected\": false, \"balanced\": true}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "67d9e826-4665-4574-880d-98d9d8b30116", + "when": null, + "workflow_outputs": [] + }, + "19": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 19, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2240, + "top": 1050 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimHeatmap\", \"__current_case__\": 6, \"dims\": \"1:15\", \"nfeatures\": \"30\", \"reduction\": \"pca\", \"plot\": {\"disp_min\": \"-2.5\", \"disp_max\": null, \"ncol\": null, \"raster\": true}, \"adv\": {\"cells\": \"500\", \"slot\": \"scale.data\", \"assays\": \"\", \"projected\": false, \"balanced\": true}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4400\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "2a169635-d6c3-45df-a53c-d72c0da315f8", + "when": null, + "workflow_outputs": [] + }, + "20": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 20, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 2240, + "top": 1220 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"ElbowPlot\", \"__current_case__\": 7, \"ndims\": \"50\", \"reduction\": \"pca\"}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "e39c2836-98bb-4967-a297-4b35369ab466", + "when": null, + "workflow_outputs": [] + }, + "21": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 21, + "input_connections": { + "seurat_rds": { + "id": 13, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 2730, + "top": 1580 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindNeighbors\", \"__current_case__\": 0, \"reduction\": \"pca\", \"dims\": \"30\", \"k_param\": \"20\", \"nn_method\": {\"nn_method\": \"annoy\", \"__current_case__\": 1, \"annoy_metric\": \"euclidean\"}, \"adv\": {\"n_trees\": \"50\", \"l2_norm\": false, \"compute_snn\": {\"compute_snn\": \"TRUE\", \"__current_case__\": 1, \"prune_snn\": null, \"distance_matrix\": false}}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "68e52588-ea3f-4dc3-ba8b-b851de5ff691", + "when": null, + "workflow_outputs": [] + }, + "22": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 22, + "input_connections": { + "seurat_rds": { + "id": 21, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 3000, + "top": 1580 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindClusters\", \"__current_case__\": 2, \"modularity_fxn\": \"1\", \"resolution\": \"0.8\", \"algorithm\": {\"algorithm\": \"1\", \"__current_case__\": 1}, \"n_start\": \"10\", \"n_iter\": \"10\", \"random_seed\": \"0\", \"group_singletons\": true, \"graph_name\": \"\", \"cluster_name\": \"\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "0c735f36-3fe7-477b-9be0-fb0e1028b249", + "when": null, + "workflow_outputs": [] + }, + "23": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "errors": null, + "id": 23, + "input_connections": { + "seurat_rds": { + "id": 22, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Run Dimensional Reduction", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 3350, + "top": 1600 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_reduce_dimension/seurat_reduce_dimension/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "e49429e8d6dd", + "name": "seurat_reduce_dimension", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"RunUMAP\", \"__current_case__\": 2, \"reduction\": \"pca\", \"assay\": \"\", \"umap_method\": {\"umap_method\": \"uwot\", \"__current_case__\": 0}, \"n_neighbors\": \"30\", \"n_components\": \"2\", \"metric\": \"cosine\", \"run_umap_on\": {\"run_umap_on\": \"dims\", \"__current_case__\": 0, \"dims\": \"30\"}, \"adv\": {\"reduction_model\": \"\", \"n_epochs\": null, \"learning_rate\": \"1\", \"min_dist\": \"0.3\", \"spread\": \"1\", \"set_op_mix_ratio\": \"1.0\", \"local_connectivity\": \"1\", \"repulsion_strength\": \"1\", \"negative_sample_rate\": \"5\", \"a\": \"\", \"b\": \"\", \"uwot_sgd\": false, \"seed_use\": \"42\", \"angular_rp_forest\": false, \"reduction_name\": \"umap\", \"reduction_key\": \"UMAP_\"}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "4177c098-ee9a-4877-a57e-571bc608577d", + "when": null, + "workflow_outputs": [] + }, + "24": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 24, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3600, + "top": 910 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "f908ea4a-5d77-4b5e-847a-5a0599df0d15", + "when": null, + "workflow_outputs": [] + }, + "25": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 25, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3600, + "top": 1140 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": \"3\", \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "ef665c3a-083d-49a3-9274-086a2214f3b2", + "when": null, + "workflow_outputs": [] + }, + "26": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 26, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 3600, + "top": 1370 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"MALAT1,NKG7,S100A8,FTL,HLA-DRA,CD74\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"3\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "4df0c807-56c1-4142-8b3e-9dfda52af5ba", + "when": null, + "workflow_outputs": [] + }, + "27": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "errors": null, + "id": 27, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Find Clusters", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + }, + { + "name": "markers_tabular", + "type": "csv" + } + ], + "position": { + "left": 4150, + "top": 1650 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_clustering/seurat_clustering/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "94f1b9c7286f", + "name": "seurat_clustering", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FindAllMarkers\", \"__current_case__\": 3, \"features\": null, \"logfc_threshold\": \"1.0\", \"test_use\": {\"test_use\": \"wilcox\", \"__current_case__\": 0, \"slot\": \"data\"}, \"set_top_markers\": {\"set_top_markers\": \"true\", \"__current_case__\": 0, \"topN\": \"10\"}, \"adv\": {\"base\": \"2\", \"assay\": \"\", \"fc_name\": \"\", \"min_pct\": \"0.01\", \"min_diff_pct\": null, \"only_pos\": true, \"max_cells_per_ident\": null, \"random_seed\": \"1\", \"min_cells_group\": \"3\", \"densify\": false}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "7d54ab6f-614e-44dd-bc0e-6c57136df569", + "when": null, + "workflow_outputs": [] + }, + "28": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 28, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4530, + "top": 270 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"CD79A,MS4A1\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": null, \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "56e5ecb5-841e-4700-9639-d3b233cf618f", + "when": null, + "workflow_outputs": [] + }, + "29": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 29, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4530, + "top": 470 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": \"4\", \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"3100\", \"height\": \"2100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "0265672b-f89c-47d0-a081-6003fa648c0a", + "when": null, + "workflow_outputs": [] + }, + "30": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 30, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4530, + "top": 670 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"FeaturePlot\", \"__current_case__\": 8, \"features\": \"CD14,LYZ,MS4A1,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP,IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15\", \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"keep_scale\": \"feature\", \"blend\": {\"blend\": \"FALSE\", \"__current_case__\": 1, \"by_col\": false}, \"cols_2\": \"lightgrey\", \"cols_3\": \"blue\", \"pt_size\": null, \"alpha\": \"1\", \"order\": false, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"coord_fixed\": false, \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}}, \"adv\": {\"cells\": null, \"slot\": \"data\", \"split_by\": \"\", \"shape_by\": \"\", \"min_cutoff\": null, \"max_cutoff\": null}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "903b771d-8f96-4119-b063-66b08c7925de", + "when": null, + "workflow_outputs": [] + }, + "31": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 32, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4530, + "top": 1160 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"CD14,LYZ,MS4A1,FCGR3A,MS4A7,GNLY,NKG7,FCER1A,CST3,PPBP,IL7R,CCR7,S100A4,CD8A,GZMK,CCL5,IL32,ISG15\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": null, \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "eb141e18-0657-4f2b-9b15-092dcccdfff9", + "when": null, + "workflow_outputs": [] + }, + "32": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 33, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4530, + "top": 1350 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"VlnPlot\", \"__current_case__\": 0, \"features\": \"GZMK,CCL5,CCR7,S100A4,CCR7,IL32,ISG15\", \"idents\": \"\", \"sort\": false, \"plot\": {\"ncol\": null, \"cols\": \"\", \"pt_size\": null, \"alpha\": \"1\", \"adjust\": \"1\", \"y_max\": null, \"same_y_lims\": false, \"log\": false, \"fill_by\": \"feature\", \"flip\": false, \"add_noise\": true}, \"adv\": {\"assay\": \"\", \"layer\": \"\", \"group_by\": \"\", \"split_by\": \"\", \"split_plot\": false, \"stack\": false}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "2bc51f26-2cce-46a4-beb7-bfd534e832f8", + "when": null, + "workflow_outputs": [] + }, + "33": { + "annotation": "This is the final annotated Seurat Object with each cluster labelled by cell type.", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "errors": null, + "id": 34, + "input_connections": { + "seurat_rds": { + "id": 23, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": "Annotated Clusters", + "name": "Seurat Data Management", + "outputs": [ + { + "name": "rds_out", + "type": "rds" + } + ], + "position": { + "left": 4610, + "top": 1940 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_data/seurat_data/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "50c5abeb08ba", + "name": "seurat_data", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"Manipulate\", \"__current_case__\": 1, \"manipulate\": {\"manipulate\": \"Rename_Idents\", \"__current_case__\": 1, \"rename\": {\"rename_all\": \"true\", \"__current_case__\": 0, \"new_names\": \"CD4 Memory T, CD14 Mono, CD4 Naive T, B, CD8 Effector T, NK, FCGR3A Mono, CD8 Memory T, CD8 Naive T,DC, CD4 IFN-activated T,Platelet\"}, \"stash_idents\": {\"stash_idents\": \"true\", \"__current_case__\": 1, \"old_ident\": \"old.ident\"}}}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "a9c5f565-9de8-4e36-9596-f8af9d21f619", + "when": null, + "workflow_outputs": [ + {"output_name": "rds_out", "label": "Annotated Clusters"} + ] + }, + "34": { + "annotation": "", + "content_id": "csv_to_tabular", + "errors": null, + "id": 35, + "input_connections": { + "csv": { + "id": 27, + "output_name": "markers_tabular" + } + }, + "inputs": [], + "label": null, + "name": "Convert CSV to tabular", + "outputs": [ + { + "name": "tabular", + "type": "tabular" + } + ], + "position": { + "left": 5040, + "top": 1440 + }, + "post_job_actions": {}, + "tool_id": "csv_to_tabular", + "tool_state": "{\"__input_ext\": \"input\", \"__target_datatype__\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"csv\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.0.0", + "type": "tool", + "uuid": "cb86cade-b903-4b7d-8d69-2abd5e3bc787", + "when": null, + "workflow_outputs": [] + }, + "35": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 36, + "input_connections": { + "seurat_rds": { + "id": 34, + "output_name": "rds_out" + } + }, + "inputs": [], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 4820, + "top": 1730 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DimPlot\", \"__current_case__\": 5, \"dims_1\": \"1\", \"dims_2\": \"2\", \"reduction\": \"umap\", \"plot\": {\"cols\": \"\", \"pt_size\": null, \"order\": \"\", \"shuffle\": {\"shuffle\": \"FALSE\", \"__current_case__\": 1}, \"alpha\": \"1\", \"ncol\": null, \"raster\": {\"raster\": \"NULL\", \"__current_case__\": 0}, \"label\": {\"label\": \"FALSE\", \"__current_case__\": 1}, \"highlight\": {\"highlight\": \"false\", \"__current_case__\": 1}, \"na_value\": \"lightgrey\"}, \"adv\": {\"cells\": null, \"group_by\": \"\", \"split_by\": \"\", \"shape_by\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"false\", \"__current_case__\": 0}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "388f9645-894b-4668-9999-2c7223d1b268", + "when": null, + "workflow_outputs": [] + }, + "36": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 37, + "input_connections": { + "method|features": { + "id": 3, + "output_name": "output" + }, + "seurat_rds": { + "id": 34, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Visualize", + "name": "method" + } + ], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5860, + "top": 1630 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DoHeatmap\", \"__current_case__\": 9, \"features\": {\"__class__\": \"ConnectedValue\"}, \"cells\": null, \"plot\": {\"group_bar\": true, \"group_colors\": \"\", \"disp_min\": \"-2.5\", \"disp_max\": null, \"label\": true, \"size\": \"2.5\", \"hjust\": \"0.0\", \"vjust\": \"0.0\", \"angle\": \"60\", \"raster\": true, \"draw_lines\": true, \"lines_width\": null, \"group_bar_height\": \"0.02\"}, \"adv\": {\"group_by\": \"ident\", \"slot\": \"scale.data\", \"assay\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "e5051943-ee0e-4d1c-bd1f-cd225bfb2cea", + "when": null, + "workflow_outputs": [] + }, + "37": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", + "errors": null, + "id": 38, + "input_connections": { + "singtabop|input": { + "id": 35, + "output_name": "tabular" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Table Compute", + "name": "singtabop" + } + ], + "label": "Input DE Markers", + "name": "Table Compute", + "outputs": [ + { + "name": "table", + "type": "tabular" + } + ], + "position": { + "left": 5420, + "top": 1430 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/table_compute/table_compute/1.2.4+galaxy0", + "tool_shed_repository": { + "changeset_revision": "3bf5661c0280", + "name": "table_compute", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"tabular\", \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"out_opts\": [\"ignore_nas\"], \"precision\": \"6\", \"singtabop\": {\"use_type\": \"single\", \"__current_case__\": 0, \"input\": {\"__class__\": \"ConnectedValue\"}, \"col_row_names\": [\"has_col_names\"], \"adv\": {\"header\": null, \"nrows\": null, \"skipfooter\": null, \"skip_blank_lines\": true}, \"user\": {\"mode\": \"select\", \"__current_case__\": 1, \"select_cols_wanted\": \"1\", \"select_rows_wanted\": null, \"select_keepdupe\": [\"select_cols_keepdupe\", \"select_rows_keepdupe\"]}}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "1.2.4+galaxy0", + "type": "tool", + "uuid": "40f4fbec-a7cd-48a2-a458-0407f1fc0bf7", + "when": null, + "workflow_outputs": [ + {"output_name": "table", "label": "Input DE Markers"} + ] + }, + "38": { + "annotation": "", + "content_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "errors": null, + "id": 39, + "input_connections": { + "method|features": { + "id": 38, + "output_name": "table" + }, + "seurat_rds": { + "id": 34, + "output_name": "rds_out" + } + }, + "inputs": [ + { + "description": "runtime parameter for tool Seurat Visualize", + "name": "method" + } + ], + "label": null, + "name": "Seurat Visualize", + "outputs": [ + { + "name": "plot_out_png", + "type": "png" + } + ], + "position": { + "left": 5860, + "top": 1360 + }, + "post_job_actions": {}, + "tool_id": "toolshed.g2.bx.psu.edu/repos/iuc/seurat_plot/seurat_plot/5.0+galaxy0", + "tool_shed_repository": { + "changeset_revision": "2622dd85416f", + "name": "seurat_plot", + "owner": "iuc", + "tool_shed": "toolshed.g2.bx.psu.edu" + }, + "tool_state": "{\"__input_ext\": \"input\", \"advanced_common\": {\"show_log\": false}, \"chromInfo\": \"/opt/galaxy/tool-data/shared/ucsc/chrom/?.len\", \"method\": {\"method\": \"DoHeatmap\", \"__current_case__\": 9, \"features\": {\"__class__\": \"ConnectedValue\"}, \"cells\": null, \"plot\": {\"group_bar\": true, \"group_colors\": \"\", \"disp_min\": \"-2.5\", \"disp_max\": null, \"label\": true, \"size\": \"2.5\", \"hjust\": \"0.0\", \"vjust\": \"0.0\", \"angle\": \"60\", \"raster\": true, \"draw_lines\": true, \"lines_width\": null, \"group_bar_height\": \"0.02\"}, \"adv\": {\"group_by\": \"ident\", \"slot\": \"scale.data\", \"assay\": \"\"}}, \"plot_format\": \"png\", \"resize\": {\"resize\": \"true\", \"__current_case__\": 1, \"width\": \"4100\", \"height\": \"4100\"}, \"seurat_rds\": {\"__class__\": \"ConnectedValue\"}, \"__page__\": null, \"__rerun_remap_job_id__\": null}", + "tool_version": "5.0+galaxy0", + "type": "tool", + "uuid": "82ae853a-4611-45d8-a3f5-3c260b671ac4", + "when": null, + "workflow_outputs": [] + } + }, + "tags": [ + "singlecell", + "seurat" + ], + "uuid": "e2275592-addd-4c72-afa6-0df1caeafbab", + "version": 6 +} diff --git a/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/index.md b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/index.md new file mode 100644 index 00000000000000..e092e0ae66ddd4 --- /dev/null +++ b/topics/single-cell/tutorials/scrna-seurat-pbmc3k/workflows/index.md @@ -0,0 +1,3 @@ +--- +layout: workflow-list +---