-
Notifications
You must be signed in to change notification settings - Fork 0
/
ch06_QA8_session.R
123 lines (75 loc) · 3.09 KB
/
ch06_QA8_session.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
##############
## Q and A ##
#############
library(tidyverse)
library(ggplot2)
# Plotting a volcano plot-------------------------------------------------------
## Load the data we will use for this course
## RNAseq data for volcano plot + highlight per filtering
rna <- read_csv("./data/res_tbl.csv")
str(rna)
head(rna)
# To plot RNAseq data, a violinplot is very common
rna %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)
rna %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj))) +
geom_point(size = 0.5)+
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)+
labs(title = "Selected cells T96 vs T48")+
theme_bw()
rna %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
color = padj < 0.05 & abs(log2FoldChange) > 1)) +
scale_colour_manual(values = c("gray", "firebrick3")) +
geom_point(size = 0.5) +
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)+
labs(title = "Selected cells T96 vs T48")+
theme_bw()
rna %>%
filter(!is.na(padj)) %>%
ggplot(aes(x = log2FoldChange, y = -log10(padj),
color = padj < 0.05 & abs(log2FoldChange) > 1,
label = ifelse(padj<0.05&log2FoldChange>=1|padj<0.05&log2FoldChange<=-1,
as.character(gene),''))) +
scale_colour_manual(values = c("gray", "firebrick3")) +
geom_point(size = 0.5) +
geom_hline(yintercept = -log10(0.05)) +
geom_vline(xintercept = 1) +
geom_vline(xintercept = -1)+
geom_text_repel(max.overlaps = 10)+
labs(title = "Selected cells T96 vs T48")+
theme_bw()
## For your information, base R also has plot functions which function
# very well and can be used when doing quality control where there is no
# need for fancy colors and titles
hist(rna$padj)
plot(rna$log2FoldChange, -log10(rna$padj))
abline(a = -log10(0.05), b= 0, v = c(1,-1))
boxplot(airquality$Temp ~ airquality$Measurer)
dev.null()
# High-level data structures----------------------------------------------------
# Besides lists, dataframesn vectors... You will encounter other types of
# objects while using R, especially in using R for omics analyses
# Some of these objects include:
# Rectangular feature x sample data –
# SummarizedExperiment::SummarizedExperiment() (RNAseq count matrix, microarray, …)
#
# Genomic coordinates – GenomicRanges::GRanges() (1-based, closed interval)
# DNA / RNA / AA sequences – Biostrings::*StringSet()
# Multi-omics data – MultiAssayExperiment::MultiAssayExperiment()
# Single cell data – SingleCellExperiment::SingleCellExperiment()
# Quantitative proteomics data – QFeatures::QFeatures()
# Tidyverse cannot be used on those objects, subsetting is made by using the
# brackets
BiocManager::install("SummarizedExperiment")
BiocManager::install("airway")
airway_df <- read_csv("./data/ch04_airway.csv")