-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalysis.R
93 lines (77 loc) · 3.16 KB
/
analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#-----------------------------------------------------------------------------------
# Compare RNA-seq data of dc-hiOL samples with public data from Jäkel et al. 2019
# doi: 10.1038/s41586-019-0903-2, GEO accession number: GSE118257
#
# Christian Thomas
#
# 2020-12-08
#------------------------------------------------------------------------------------
# Load libraries
library(dplyr)
library(pheatmap)
library(ggplot2)
library(RColorBrewer)
library(preprocessCore)
library(DESeq2)
library(openxlsx)
# Load dc-hiOL data
hiOL_anno <- read.csv("./data/samples.txt",sep = ",")
hiOL<-read.table("./data/hiOL_salmon.counts")
# Load sn-RNAseq data from Jäkel et al. 2019 (GSE118257)
geoanno<-read.table("./data/GSE118257_anno.txt", header=TRUE, stringsAsFactors = FALSE)
geoexpr<-read.table("./data/GSE118257_expr.txt.gz", header=TRUE, row.names=1, stringsAsFactors = FALSE)
# Get intersection of gene names from both datasets
hiOL<-hiOL[intersect(rownames(hiOL),rownames(geoexpr)),]
geo<-geoexpr[intersect(rownames(hiOL),rownames(geoexpr)),]
colnames(geo)<-geoanno$Celltypes
# Calculate average expression values for each sn-RNAseq cluster (CellType)
geomean<-as.data.frame(t(geo))
geomean$Celltypes<-geoanno$Celltypes
geomean<-as.data.frame(geomean %>% group_by(Celltypes) %>% summarise_all(mean))
# Remove Endothelial cells, Macrophage and Vasc_smooth_muscle
geomean<-geomean[-c(4,5,7,22),]
# Tidy data
rownames(geomean)<-geomean[,1]
geomean<-geomean[,-1]
# Normalize sn-RNAseq count data
geomean<-t(geomean)
geomean<-round(geomean*100,0)
geomean_norm<-data.frame(normalize.quantiles(as.matrix(geomean)))
colnames(geomean_norm)<-colnames(geomean)
rownames(geomean_norm)<-rownames(geomean)
# Combine both datasets
comb<-cbind(hiOL, geomean_norm)
# Normalize combined data
comb_norm<-data.frame(normalize.quantiles(as.matrix(comb)))
comb_norm<-round(comb_norm,0)
colnames(comb_norm)<-colnames(comb)
colnames(comb_norm)[14]<-"Microglia"
rownames(comb_norm)<-rownames(comb)
# Import count data in DEseq2 and apply a variance stabilizing transformation (VST)
coldata<-data.frame(batch=c(rep("hiOL",9),rep("snRNA",18)))
rownames(coldata)<-colnames(comb_norm)
dds <- DESeqDataSetFromMatrix(comb_norm, coldata, ~batch)
vsd <- vst(dds, blind = FALSE)
mat <- assay(vsd)
# Import marker genes
genesofinterest<-read.xlsx("./data/marker_genes.xlsx")
mat<-mat[match(genesofinterest$Gene,rownames(mat)),]
colnames(mat)<-c(paste0("dc-hiOL",seq(1,3,1)),
rep("Fib",3),rep("pOL",3),
colnames(mat[,10:length(mat[1,])]))
# Row annotation
row_annotation<-data.frame(Celltype=genesofinterest$Celltype)
rownames(row_annotation)<-rownames(mat)
celltype_cols<-brewer.pal(6,"Dark2")
names(celltype_cols)<-unique(genesofinterest$Celltype)
anno_cols<-list(Celltype=celltype_cols)
pheatmap(mat,
color=colorRampPalette(c("darkblue","blue","white","yellow"))(255),
clustering_method = "complete",
clustering_distance_cols = "euclidean",
annotation_row = row_annotation,
annotation_colors = anno_cols,
cluster_rows = FALSE,
gaps_row = c(7,21,26,30,32)
)