-
Notifications
You must be signed in to change notification settings - Fork 0
/
Wild_v_cultivated_code_for_publish_22022021.R
704 lines (550 loc) · 49.5 KB
/
Wild_v_cultivated_code_for_publish_22022021.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
#Thomas R. Davies 2021
#The following file is an investigation of M. domestica and M. serverseii
#the data analyzed was gathered by the Myles lab in 2016 and 2017 under the file
#names 20200306_supp_table_1 and 20191017_supp_table_2
#The object of this investigation is to investigate differences and similarities in
#wild and domestic apples.
install.packages("tidyverse")
library(tidyverse)
install.packages("openxlsx")
library("openxlsx")
#######################################################################
#DATA CURATION# #!#
#######################################################################
#setting working directory
setwd("~/Desktop/DALMSc/Biostats/abc_project")
#Read in the raw data. This is the only table used in this analysis.
abc_data = read.csv("Watts_et_al_2021_supp_data.csv")
#This supplementary table is from Watts et al 2021 *LINK*.
# reduce table to only useful phenotypes
# make a vector of the column numbers that we wish to keep, this will exclude phenotypes from other years that is not usable in the current analysis
c_to_k = c(1,2,5,6,7,9,21,22,35:39,41,44,46)
abc_data = abc_data[,c_to_k]
#this analysis will only look at dessert and wild apples
#trim data to contain only dessert and wild apples.
index = which(abc_data$use == 'dessert' | abc_data$use == 'wild')
abc_data=abc_data[index,]
#removing NA data from the 'species' , and rewriting the index to analyze only dessert and wild apples with defined species attributes
abc_data=abc_data[!is.na(abc_data[,'species']),]
#This table now only includes dessert and wild apples
# the following loop will check each row and sum the "NA" from that row
#Then it will put it into "na_sum" for each apple sample
#The result is a value with 1 row of 879 values and each entry represents the number of missing data points for that sample
na_sum = c()
for (i in 1:nrow(abc_data)) {
na_sum[i] = sum(is.na(abc_data[i,7:16]))
}
#lets remove the samples that are missing all phenotype data
table(na_sum)
#0 1 2 3 4 5 6 7 8 9 10
#220 205 141 131 5 1 1 45 99 25 6
# 6 samples are missing all phenotype data and must be removed
index_keep = !(na_sum == 10)
abc_data = abc_data[index_keep,]
#get a count of the number of each species in the data
sum(abc_data$species == "domestica", na.rm = T)
#[1] 797. There are 797 dessert
sum(abc_data$species == "sieversii", na.rm = T)
#[1] 76. there are 76 sieversii in the dataset
total_wild = sum(abc_data$species == "sieversii", na.rm = T)
total_dessert = sum(abc_data$use == 'dessert', na.rm = T)
#how many phenotypes?
# there are 10 phenotypes
n_pheno = 10
#The first 6 columns are meta data
### EXAMINE THE DATA IN DETAIL ###
#produce figures that describes the amount of missing data
#this will be missing data by sample
#this loop records how much data is missing for each apple
na_sum = c()
for (i in 1:nrow(abc_data)) {
na_sum[i] = sum(is.na(abc_data[i,7:16]))
}
#this will calculate what proportion of data is missing from each sample, and then plot the number of samples missing what proportion of data
missing_percent_by_sample = (na_sum/n_pheno)*100
barplot(table(missing_percent_by_sample))
#This is a base plot
#Missing data by phenotype is important
#Make a figure to show the missing data by phenotype
index_wild = which(abc_data$use == 'wild')
index_dess = which(abc_data$use == 'dessert')
na_prop_pheno_wild = c()
na_prop_pheno_dess = c()
for (i in 7:16) {#only loop through columns that have phenotype data (not metadata)
na_prop_pheno_wild[i] = (sum(is.na(abc_data[index_wild,i]))/length(index_wild))*100
na_prop_pheno_dess[i] = (sum(is.na(abc_data[index_dess,i]))/length(index_dess))*100
}
na_prop_pheno_dess
na_prop_pheno_wild
#this is the proportion of missing data for each pheno, but the first 6 entries are NAs. Need to remove the NAs.
na_prop_pheno_dess = na_prop_pheno_dess[7:16]
na_prop_pheno_dess
na_prop_pheno_wild = na_prop_pheno_wild[7:16]
na_prop_pheno_wild
#NAs removed
#Make dataframe with the prop_pheno missing that will be used to make a barplot in ggplot
pheno_dat = c(rbind(na_prop_pheno_dess, na_prop_pheno_wild))
group_names = rep(c("dessert", "wild"), 10)
pheno_names = colnames(abc_data)[7:16]
pheno_names_alt = c('Flowering Date', 'Precocity', 'Soluble Solids', 'Acidity', 'Harvest Date', 'Firmness', 'Weight', 'Change in acidity during storage', 'Change in firmness during storage', 'Phenolic Content')
pheno_names_alt = c(rbind(pheno_names_alt, pheno_names_alt))
dataframe2plot = data.frame(pheno_dat, group_names, pheno_names_alt)
#the following is transformations of the columns into factors so they can have levels.
dataframe2plot$pheno_names_alt = factor(pheno_names_alt, levels = rev(c('Precocity', 'Flowering Date', 'Harvest Date', 'Firmness', 'Weight', 'Acidity', 'Soluble Solids', 'Phenolic Content', 'Change in acidity during storage', 'Change in firmness during storage')))
dataframe2plot$group_names = factor(group_names, levels = c("wild","dessert"))
dataframe2plot[order(dataframe2plot$pheno_names_alt, dataframe2plot$group_names),]
#plot
percent_missing_data_chart = ggplot(dataframe2plot, aes(fill=group_names, y=pheno_names_alt, x= pheno_dat))+
geom_bar(position="dodge", stat="identity") +
labs(title= "% Data Missing by Phenotype", x='% Data Missing',y= 'Phenotype', fill = "Apple Type") +
scale_fill_manual(values = c('#FFDB6D','#00AFBB'))+theme_bw() +guides(fill= guide_legend(reverse= TRUE)) + scale_x_continuous( limits = c(0,100))+theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 10,))
percent_missing_data_chart
###Writing above plot to file ###
pdf(file= "supp_1_plot.pdf", width = 7, height = 4)
ggplot(dataframe2plot, aes(fill=group_names, y=pheno_names_alt, x= pheno_dat))+
geom_bar(position="dodge", stat="identity") +
labs(title= "% Data Missing by Phenotype", x='% Data Missing',y= 'Phenotype', fill = "Apple Type") +
scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() +guides(fill= guide_legend(reverse= TRUE)) + scale_x_continuous( limits = c(0,100))+theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 10,))
dev.off()
#After looking at this plot, I decided to keep all of the 10 phenotypes because, even with low sample sizes I feel that the analyses may be informative.
#Create a new data frame with the column headers named like in the figure above and sort it as it will be plotted for all following figures. Then write this table to file - it is the final phenotype file that will read in for the remainder of the analyses in this script.
#I will make a dara frame that has the useful metadata and the ten phenotypes
#DATA SORTING
abc_data = read.csv("Watts_et_al_2021_supp_data.csv")
index = which(abc_data$use == 'dessert' | abc_data$use == 'wild')
abc_data=abc_data[index,]
#removing NA data from the 'species' , and rewriting the index so i can analyse only dessert and wild apples with defined species attributes
abc_data=abc_data[!is.na(abc_data[,'species']),]
index_wild = which(abc_data$use == 'wild')
index_dess = which(abc_data$use == 'dessert')
#Modifying the labels on Wild and Dessert so that they are capitalized for graph labelling.
abc_data$use[index_wild] = "Wild"
abc_data$use[index_dess] = "Dessert"
c_to_k = c(1,2,5:9,21,22,35:39,41,44,46 )
abc_data = abc_data[,c_to_k]
#Theses are the columns I need, now I need to rename the col names
#Ive added 'use' to the pheno_names_alt
pheno_names_alt = c('Plant ID','Apple ID', 'Species','Use','Country','World','Release Year', 'Flowering Date', 'Precocity', 'Soluble Solids', 'Acidity', 'Harvest Date', 'Firmness', 'Weight', 'Change in acidity during storage', 'Change in firmness during storage', 'Phenolic Content')
for (i in 1:ncol(abc_data)){
names(abc_data)[i] = pheno_names_alt[i]
}
abc_data
# This has renamed them. Now they need to be reoredered.
# I am putting USE at the right-most column of meta-data for ease of trimming 'abc_data' later on
#Ive added 'Use' to pheno_names_order
pheno_names_order = c('Plant ID','Apple ID', 'Species','Country','World','Release Year','Use','Precocity', 'Flowering Date', 'Harvest Date', 'Firmness', 'Weight', 'Acidity', 'Soluble Solids', 'Phenolic Content', 'Change in acidity during storage', 'Change in firmness during storage')
abc_data = abc_data[,pheno_names_order]
######ADD NEEDED META DATA TO THIS FILE
#Okay. Now this table is ordered and named properly.
#writing to a final file.
write.csv(abc_data,file = '~/Desktop/DALMSc/Biostats/abc_project/final_phenotype_table.csv',row.names = FALSE)
#File written
#######################################################################
#PCA# #!#
#######################################################################
#In this section, I will analyse the data using a principal components analysis in order to determine if there are differences between wild and dessert apples.
setwd("~/Desktop/DALMSc/Biostats/abc_project")
abc_data = read.csv('~/Desktop/DALMSc/Biostats/abc_project/final_phenotype_table.csv')
#remove metadata for PCA
pca_data = abc_data[8:17]
#create indices
index_wild = which(abc_data$Use == 'Wild')
index_dess = which(abc_data$Use == 'Dessert')
layout(matrix(1:10, 2, 5, byrow = T))
#this will visualize each phenotype as a histogram
for (i in 1:ncol(pca_data)) {
hist(pca_data[,i], xlab = colnames(pca_data)[i])
}
#this data will need to be scaled in order to appropriately interpret it in the PCA.
#Scaling the data
pca_data_scaled = scale(pca_data)
#visualizing scaled data as histograms
layout(matrix(1:10, 2, 5, byrow = T))
for (i in 1:ncol(pca_data_scaled)) {
hist(pca_data_scaled[,i], xlab = colnames(pca_data[i]))
}
dev.off()
#Set NA values to 0
#removal of NAs by setting them to 0, which is now the mean of each phenotype
pca_data_scaled_noNAs = apply(pca_data_scaled, 2, function (x) {ifelse(is.na(x), 0, x)})
#running the PCA
pca1 = prcomp(pca_data_scaled_noNAs)
scores = pca1$x
#reporting the proportion variance explained by each PC
prop_var_tab = summary(pca1)
prop_var_tab
loadings = pca1$rotation
#Plotting the PCA
###
install.packages(ggplot2)
library(ggfortify)
###
#preliminary plot.
plot(pca1$x[,1],pca1$x[,3])
### PCA PLOTTING ###
#Plot PC1 and PC2
pdf(file= "PCA1_PC2_plot.pdf", width = 7, height = 4)
pc1pc2_plot = autoplot(pca1, data = abc_data, colour = 'Use')+xlab('PC1 (22.3%)')+ylab('PC2 (13.7%)')+scale_colour_manual(name = "Apple Type", labels = c('Cultivated','Wild'),values = c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.9, 0.9),legend.title.align = 0.5,legend.background = element_rect(size = 0.5,linetype = 'solid', colour = 'black'),legend.title = element_text(size = 10,face = "bold"), legend.text = element_text(size = 10),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))
pc1pc2_plot
dev.off()
#PLOT PC1 AND PC3
pc1pc3_plot= autoplot(pca1, x = 1, y = 3, data = abc_data, colour = 'Use')+xlab('PC1 (22.3%)')+ylab('PC3 (13.4%)')+scale_colour_manual(name = "Apple Type", labels = c('Cultivated','Wild'),values = c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.9, 0.9),legend.title.align = 0.5,legend.background = element_rect(size = 0.5,linetype = 'solid', colour = 'black'),legend.title = element_text(size = 10,face = "bold"), legend.text = element_text(size = 10),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))
pc1pc3_plot
#PLOT PC1 and PC4
pc1pc4_plot = autoplot(pca1, x = 1, y = 4, data = abc_data, colour = 'Use')+
labs(title= "Principal Components Analysis of Wild and Dessert Apples")+
scale_colour_manual(values = c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.9, 0.9),legend.title.align = 0.5,legend.background = element_rect(size = 0.5,linetype = 'solid', colour = 'black'),legend.title = element_text(size = 9,face = "bold"), legend.text = element_text(size = 8),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))
pc1pc4_plot
#PLOT PC2 and PC3
pc2pc3_plot = autoplot(pca1, x = 2, y = 3, data = abc_data, colour = 'Use')+
labs(title= "Principal Components Analysis of Wild and Dessert Apples")+
scale_colour_manual(values = c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.9, 0.9),legend.title.align = 0.5,legend.background = element_rect(size = 0.5,linetype = 'solid', colour = 'black'),legend.title = element_text(size = 9, face = "bold"), legend.text = element_text(size = 8),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))
pc2pc3_plot
#significance testing
pc1_scores = pca1$x[,1]
pc2_scores = pca1$x[,2]
pc3_scores = pca1$x[,3]
pc4_scores = pca1$x[,4]
pc1_sig = wilcox.test(pc1_scores[index_dess],pc1_scores[index_wild], exact = F)
pc1_sig
# W = 53893
#exact p value
pc1_sig$p.value
pc2_sig = wilcox.test(pc2_scores[index_dess],pc2_scores[index_wild], exact = F)
pc2_sig
# W = 13066
#exact p value
pc2_sig$p.value
pc3_sig =wilcox.test(pc3_scores[index_dess],pc3_scores[index_wild], exact = F)
pc3_sig
# W = 39203
#exact p value
pc3_sig$p.value
#PC1, PC2 and PC3 are all significant. PC4 is not.
wilcox.test(pc4_scores[index_dess],pc4_scores[index_wild], exact = F)
wilcox.test(pca1$x[index_dess,5],pca1$x[index_wild,5], exact = F)
wilcox.test(pca1$x[index_dess,6],pca1$x[index_wild,6], exact = F)
wilcox.test(pca1$x[index_dess,7],pca1$x[index_wild,7], exact = F)
wilcox.test(pca1$x[index_dess,8],pca1$x[index_wild,8], exact = F)
### CREATE VIOLIN PLOTS FOR PC SCORES ###
# Make a dataframe with PC scores and use.
pca_scores_df = data.frame(abc_data$Use, pc1_scores,pc2_scores, pc3_scores )
names(pca_scores_df)[1] = "Use"
names(pca_scores_df)[2] = "PC1 Score"
names(pca_scores_df)[3] = "PC2 Score"
names(pca_scores_df)[4] = "PC3 Score"
# PCA 1 scores
#pdf(file= "PCA1_scores_plot.pdf", width = 7, height = 4)
pc1scores = ggplot(pca_scores_df, aes(x= Use, y= pca_scores_df[,2], fill = Use))+ geom_violin(colour = 'black', alpha = 0.75,show.legend = FALSE )+ geom_boxplot(width=0.1, fill = "white") +theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(colour = "black", size=10))+labs(x = " ",y= names((pca_scores_df)[2]))+scale_fill_manual(values=c('#00AFBB','#FFDB6D'))+geom_signif(comparisons= list(c('Dessert','Wild')))+ scale_x_discrete(labels=c("Dessert" = "Cultivated", "Wild" = "Wild"))
print(pc1scores)
#dev.off()
# PCA 2 scores
#pdf(file= "PCA2_scores_plot.pdf", width = 7, height = 4)
pc2scores = ggplot(pca_scores_df, aes(x= Use, y= pca_scores_df[,3], fill = Use))+ geom_violin(colour = 'black', alpha = 0.75,show.legend = FALSE )+ geom_boxplot(width=0.1, fill = "white") +theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(colour = "black", size=10))+labs(x = " ", y= names((pca_scores_df)[3]))+scale_fill_manual(values=c('#00AFBB','#FFDB6D'))+geom_signif(comparisons= list(c('Dessert','Wild')))+ scale_x_discrete(labels=c("Dessert" = "Cultivated", "Wild" = "Wild"))
print(pc2scores)
#dev.off()
#PCA 3 scores
#pdf(file= "PCA3_scores_plot.pdf", width = 7, height = 4)
pc3scores = ggplot(pca_scores_df, aes(x= Use, y= pca_scores_df[,4], fill = Use))+ geom_violin(colour = 'black', alpha = 0.75,show.legend = FALSE )+ geom_boxplot(width=0.1, fill = "white") +theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(colour = "black", size=10))+labs(x = " ",y= names((pca_scores_df)[4]))+scale_fill_manual(values=c('#00AFBB','#FFDB6D'))+ geom_signif(comparisons= list(c('Dessert','Wild'))) + scale_x_discrete(labels=c("Dessert" = "Cultivated", "Wild" = "Wild"))
print(pc3scores)
#dev.off()
#Use ggarange to make the figure
#Layout should be as follows:
#PC1 v 2 - PC1 v 3
#PC1, PC2, PC3
library("cowplot")
pdf(file= "figure_1_PCA.pdf", width = 11, height = 8.5)
figure_1_PCA_experiment = ggdraw() +draw_plot(pc1scores, x = 0, y = 0, width = 0.33, height = 0.5) +draw_plot(pc2scores, x = 0.33, y = 0, width = 0.33, height = 0.5) + draw_plot(pc3scores, x = 0.66, y = 0, width = 0.33, height = 0.5) + draw_plot(pc1pc2_plot, x = 0, y = 0.5, width = 0.5, height= 0.5) + draw_plot(pc1pc3_plot, x = 0.5,y=0.5, width=0.5,height = 0.5)
figure_1_PCA_experiment
dev.off()
#######################################################################
#DENSITY DISTRIBUTIONS FOR EACH PHENOTYPE# #!#
#######################################################################
#In this section I will investigate the differences between dessert and wild apples for each of the phenotypes
setwd("~/Desktop/DALMSc/Biostats/abc_project")
abc_data = read.csv('~/Desktop/DALMSc/Biostats/abc_project/final_phenotype_table.csv')
install.packages(ggplot2)
library(ggfortify)
install.packages('devtools')
library(devtools)
install.packages('ggpubr')
library(ggpubr)
#create indicies
index_wild = which(abc_data$Use == 'Wild')
index_dess = which(abc_data$Use == 'Dessert')
#trim out uneeded meta data
abc_data = abc_data[7:17]
layout(matrix(1:10, 2, 5, byrow = T))
#The following loop will print out each phenotype comparison between wild and dessert apples USING DENSITY PLOTS
#pdf(file= "density_plots.pdf")
#par(mfrow = c(5, 2))
for (i in 2:ncol(abc_data)){
gg=ggplot(abc_data, aes(x= abc_data[,i], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x=names((abc_data)[i]),y= 'Density')
print(gg)
}
#dev.off()
#####
#the next section is purely for PDF printing purposes, for the DENSITY PLOTS
#####
#PRECOCITY
#pdf(file= "Precocity_densityplot.pdf", width = 7, height = 4)
d1=ggplot(abc_data, aes(x= abc_data[,2], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x= ' Precocity (years)',y= 'Density')
print(d1)
#dev.off()
#FLOWERING DATE
#pdf(file= "floweringdate_densityplot.pdf", width = 7, height = 4)
d2=ggplot(abc_data, aes(x= abc_data[,3], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Flowering date (julian days)',y= 'Density')
print(d2)
#dev.off()
#HARVEST DATE
#pdf(file= "Harvestdate_densityplot.pdf", width = 7, height = 4)
d3=ggplot(abc_data, aes(x= abc_data[,4], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Harvest date (julian days)',y= 'Density')
print(d3)
#dev.off()
#FIRMNESS
#pdf(file= "Firmness_densityplot.pdf", width = 7, height = 4)
d4=ggplot(abc_data, aes(x= abc_data[,5], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Firmness (kg/cm2)',y= 'Density')
print(d4)
#dev.off()
#WEIGHT
#pdf(file= "Weight_densityplot.pdf", width = 7, height = 4)
d5=ggplot(abc_data, aes(x= abc_data[,6], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Weight (g)',y= 'Density')
print(d5)
#dev.off()
#ACIDITY
#pdf(file= "Acidity_densityplot.pdf", width = 7, height = 4)
d6=ggplot(abc_data, aes(x= abc_data[,7], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Acidity (malic acid mg/ml)',y= 'Density')
print(d6)
#dev.off()
#SOLUBLE SOLIDS
#pdf(file= "solublesolids_densityplot.pdf", width = 7, height = 4)
d7=ggplot(abc_data, aes(x= abc_data[,8], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='Soluble solids (Brix)',y= 'Density')
print(d7)
#dev.off()
#PHENOLIC CONTENT
#pdf(file= "Phenoliccontent_densityplot.pdf", width = 7, height = 4)
d8=ggplot(abc_data, aes(x= abc_data[,9], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x= 'Phenolic content (umol GAE/g)',y= 'Density')
print(d8)
#dev.off()
#CHANGE IN ACIDITY
#pdf(file= "aciditycahnge_densityplot.pdf", width = 7, height = 4)
d9=ggplot(abc_data, aes(x= abc_data[,10], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='% change in acidity during storage',y= 'Density')
print(d9)
#dev.off()
#CHANGE IN FIRMNESS
#pdf(file= "Changeinfirmness_densityplot.pdf", width = 7, height = 4)
d10=ggplot(abc_data, aes(x= abc_data[,11], fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+scale_colour_manual(labels = c('Cultivated','Wild'),values=c('#00AFBB','#FFDB6D'))+theme_bw() + theme(legend.position = c(0.85, 0.85),legend.title = element_blank(),panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( x='% change in firmness during storage',y= 'Density')
print(d10)
#dev.off()
library(ggpubr)
density_figure = ggarrange(d1,d2,d3,d4,d5,d6,d7,d8,d9,d10,ncol = 2, nrow = 5)
pdf(file= "density_figure.pdf", width = 8.5, height = 11)
density_figure
dev.off()
test_df = data.frame(1,2,3,4,5)
#WILCOXON TESTS
#loop for wilcox tests
for (i in 2:ncol(abc_data)){
c = abc_data[,i]
print(names(abc_data)[i])
wt = (wilcox.test(c[index_dess],c[index_wild]))
print(wt)
print(wt$p.value)
if ((wt$p.value) < 0.05){print('SIGNIFICANT')}
else {print('NOT SINGNIFICANT')}
}
wilcox.test(abc_data)
#Significant results:
#Precocity, flowering date, harvest date, weight, acidity, phenolic content
#Exact P values are printed out in the above loop.
# Note: these tests do not adjust the p value, so the p values in the publication must be adjusted (divided by the number of observations) post-hoc.
# to retrive exact P-values:
test_prec = wilcox.test(abc_data$Precocity[index_wild],abc_data$Precocity[index_dess])
test_prec$p.value
test_flow = wilcox.test(abc_data$Flowering.Date[index_wild],abc_data$Flowering.Date[index_dess])
test_flow$p.value
test_harv = wilcox.test(abc_data$Harvest.Date[index_wild],abc_data$Harvest.Date[index_dess])
test_harv$p.value
test_weight = wilcox.test(abc_data$Weight[index_wild],abc_data$Weight[index_dess])
test_weight$p.value
test_acid = wilcox.test(abc_data$Acidity[index_wild],abc_data$Acidity[index_dess])
test_acid$p.value
test_phenol = wilcox.test(abc_data$Phenolic.Content[index_wild],abc_data$Phenolic.Content[index_dess])
test_phenol$p.value
test_xyz = wilcox.test(abc_data$Change.in.firmness.during.storage[index_wild],abc_data$Change.in.firmness.during.storage[index_dess])
test_xyz$p.value
#Next, I would like to calculate the % difference in the medians for each of the significant phenotypes:
#Precocity, flowering date, harvest date, weight, acidity, phenolic content #I need to take the median of the wild, subtract median of the cultivated and then divide by the median of the wild.
# trim data
sig_columns = c(1,2,3,4,6,7,9)
abc_data_percent_change = abc_data[,sig_columns]
#Precocity
precocity_wild_median = median(as.vector(abc_data$Precocity[index_wild]), na.rm = TRUE)
precocity_cult_median = median(as.vector(abc_data$Precocity[index_dess]), na.rm = TRUE)
precocity_change = (precocity_wild_median - precocity_cult_median)/precocity_wild_median
precocity_change
#Wild apples are 21% higher in precocity
index_wild = which(abc_data_percent_change$Use == "Wild")
index_dess = which(abc_data_percent_change$Use == "Dessert")
#the following is a loop to compare all of the phenotypes and present the change in a vector
change_vector = c()
for (i in 2:ncol(abc_data_percent_change)){
wild_median = median(as.vector(abc_data_percent_change[,i][index_wild]), na.rm = TRUE)
cult_median = median(as.vector(abc_data_percent_change[,i][index_dess]), na.rm = TRUE)
change = (wild_median - cult_median)/wild_median
change = change*100
print(change)
change_vector[i-1]= change
}
change_vector
# Cultivated apples are 21% less precocious than wild
# Cultivated apples flower 2% later - 3 days than wild (Flowering season from 143 - 161 days = 18 days) 3/18 = 16.6% later
# Cultivated apples are harvested 5.9% later - 15 days than wild (Harvest season from 224 - 290 days = 66 days) 15/66 = 22.7%
# Cultivated apples are 262% heavier than wild
# Cultivated apples are 43% less acidic than wild
# Cultivated apples are 68% lower in phenolic content than wild
# for flowering date and harvest date, # of days may be more understandable
#Harvest date
wild_median_harv_date = median(as.vector(abc_data_percent_change$Harvest.Date[index_wild]), na.rm = TRUE)
cult_median_harv_date = median(as.vector(abc_data_percent_change$Harvest.Date[index_dess]),na.rm = TRUE)
harv_date_difference = wild_median_harv_date - cult_median_harv_date
harv_date_difference
#Flowering date
wild_median_flow_date = median(as.vector(abc_data_percent_change$Flowering.Date[index_wild]), na.rm = TRUE)
cult_median_flow_date = median(as.vector(abc_data_percent_change$Flowering.Date[index_dess]),na.rm = TRUE)
flow_date_difference = wild_median_flow_date - cult_median_flow_date
flow_date_difference
#Wild apples have a harvest date that is 15 days earlier than cultivated
#Wild apples have a flowering date that is 3 days earlier than cultivated
#######################################################################
#PHENOTYPIC DIFFERENCES AND RELEASE YEAR ANALYSIS# #!#
#######################################################################
# I would like to make 10, 2-panel figures (all in one) that shows the phenotypic differences (or lack thereof) as well as a time line for release year of dessert apples side by side. It will have the violin plot on the left and the year of release plotted against phenotypic value on the right #In this next section I will plot the phenotypes against the year of release
setwd("~/Desktop/DALMSc/Biostats/abc_project")
abc_data = read.csv('~/Desktop/DALMSc/Biostats/abc_project/final_phenotype_table.csv')
install.packages("ggpubr")
library("ggpubr")
index_wild = which(abc_data$Use == 'Wild')
index_dess = which(abc_data$Use == 'Dessert')
#Make a loop to graph them all
#remove meta data, keep release year
c_to_k = c(6,8:17)
abc_data_time = abc_data[c_to_k]
for (i in 2:ncol(abc_data_time)){
gg=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,i] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= names(abc_data_time)[i])+stat_cor(method="pearson",digits=3)
print(gg)
}
# From prior knowledge from Watts, I want to change the scale on TPC graph to take a closer look at the trend
ggplot(abc_data_time, aes(x= Release.Year, y= Phenolic.Content ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= 'Phenolic Content') + ylim(0,15)+ xlim (1800,2018)+stat_cor(method="pearson",digits=3)
#PEARSONS CORRELATION TEST
test = cor.test(abc_data_time$Release.Year, abc_data_time$Phenolic.Content, method = c("pearson"))
test$estimate
#PRINTING OUT SCATTER PLOTS VISUALIZED ABOVE
#PRECOCITY
s1=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,2] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= names(abc_data_time)[2])+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s1)
#FLOWERING DATE
s2=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,3] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= 'Flowering Date')+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s2)
# I scaled the X axis, however I am not sure if the datapoint that includes one apple from 1700 is considered still.
#HARVEST DATE
s3=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,4] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= 'Harvest Date')+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s3)
#FIRMNESS
s4=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,5] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= names(abc_data_time)[5])+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s4)
#WEIGHT
s5=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,6] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= names(abc_data_time)[6])+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s5)
#ACIDITY
s6=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,7] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= names(abc_data_time)[7])+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s6)
#SOLUBLE SOLIDS
s7=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,8] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= 'Soluble Solids')+stat_cor(method="pearson",digits=3)+xlim(1800,2015)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s7)
#PHENOLIC CONTENT
s8=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,9] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+labs( x='Release Year',y= 'Phenolic Content')+stat_cor(method="pearson",digits=3)+geom_smooth(method = "lm", colour = "black", se = FALSE)+xlim(1800,2015)+scale_y_continuous(limits = c(0.3,19.3))
print(s8)
##
#Use +xlim(1800,2015) to scale the x axis if needed
##
#CHANGE IN ACIDITY DURING STORAGE
s9=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,10] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+xlim(1800,2015)+labs( x='Release Year',y= 'Change in acidity during storage')+stat_cor(method="pearson",digits=3)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s9)
#CHANGE IN FIRMNESS DURING STORAGE
s10=ggplot(abc_data_time, aes(x= Release.Year, y= abc_data_time[,11] ))+ geom_point(colour ='#00AFBB')+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2), plot.title = element_text(size = 9))+xlim(1800,2015)+labs( x='Release Year',y= 'Change in firmness during storage')+stat_cor(method="pearson",digits=3)+geom_smooth(method = "lm", colour = "black", se = FALSE)
print(s10)
# in order to get the two types of plots to be visually comparable, I will need to make them have the same scales and I would like to make the desity plots have a smaller x axis
#To do this I think I will need to do all of the density plots individually to make them match
#included below are the plots for only the traits with significant trends
#START WITH PRECOCITY
#the following section is purely for printing purposes.
abc_data_wild = abc_data[index_wild,]
g1=ggplot(abc_data_wild, aes(y=abc_data_wild$Precocity, fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#FFDB6D'))+scale_colour_manual(values=c('#FFDB6D'))+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( y='Precocity',x= 'Density') + scale_y_continuous(limits = c(0,4))
print(g1)
#VIOLIN VARIATION
g1=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Precocity))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(0,4))
print(g1)
#FLOWERING DATE
g2=ggplot(abc_data_wild, aes(y=abc_data_wild$Flowering.Date, fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#FFDB6D'))+scale_colour_manual(values=c('#FFDB6D'))+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( y='Flowering Date',x= 'Density') + scale_y_continuous(limits = c(143,161.5))
print(g2)
#VIOLIN VARIATION
#pdf(file= "flowering_date_violing.pdf", width = 3, height = 4)
g2=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Flowering.Date))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(143,161.5))
print(g2)
#dev.off()
#HARVEST DATE
g3=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Harvest.Date))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(225,290))
print(g3)
#FIRMNESS
g4=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Firmness))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(2.5,14))
print(g4)
#WEIGHT
g5=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Weight))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(20,460))
print(g5)
#ACIDITY
g6=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Acidity))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(2,26))
print(g6)
# SOLUBLE SOLIDS
g7=ggplot(abc_data_wild, aes(y=abc_data_wild$Soluble.Solids, fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#FFDB6D'))+scale_colour_manual(values=c('#FFDB6D'))+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( y='Soluble Solids',x= 'Density') + scale_y_continuous(limits = c(7,16.5))
print(g7)
#VIOLIN VARIATION
#pdf(file= "soluble_solids_violin.pdf", width = 3, height = 4)
g7=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Soluble.Solids))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(7,16.5))
print(g7)
#dev.off()
#PHENOLIC CONTENT
g8=ggplot(abc_data_wild, aes(y=abc_data_wild$Phenolic.Content, fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#FFDB6D'))+scale_colour_manual(values=c('#FFDB6D'))+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( y='Phenolic Content',x= 'Density') + scale_y_continuous(limits = c(0.3,19.3))
print(g8)
#VIOLIN VARIATION
#pdf(file= "phenolic_content_violin.pdf", width = 3, height = 4)
g8=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Phenolic.Content))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + ylim(0.3,19.3)
print(g8)
#dev.off()
#CHANGE IN ACIDITY DURING STORAGE
g9=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Change.in.acidity.during.storage))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples',y= element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(-77,29))
print(g9)
#CHANGE IN FIRMNESS DURING STORAGE
g10=ggplot(abc_data_wild, aes(y=abc_data_wild$Change.in.firmness.during.storage, fill = Use, colour= Use))+ geom_density(alpha = 0.4)+scale_fill_manual(values=c('#FFDB6D'))+scale_colour_manual(values=c('#FFDB6D'))+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2))+labs( y='Change in firmness during storage',x= 'Density') + scale_y_continuous(limits = c(-68,15))
print(g10)
#VIOLIN VARIATION
#pdf(file= "change_in_firmness_violin.pdf", width = 3, height = 4)
g10=ggplot(abc_data_wild, aes(x = Use, y= abc_data_wild$Change.in.firmness.during.storage))+ geom_violin(colour = 'black', fill ='#FFDB68', alpha = 0.75 )+geom_boxplot(width = 0.1) + labs( x='Wild Apples', y = element_blank(), fill = "Apple Type")+theme_bw() + theme(panel.border = element_blank(), axis.text=element_text(colour = "black", size=10), axis.title=element_text(size = 10, face = "bold"), panel.grid.major = element_blank(), panel.grid.minor = element_blank(), axis.line = element_line(colour = "black", size = 0.2)) + scale_y_continuous(limits = c(-68,15))
print(g10)
#dev.off()
#Using ggarrange to make the figure
#For this figure I will just be looking at the significant scatter plots paired with thier violin plots. This will be:
#Flowering Date -- s2,g2
#Soluble Solids -- s7,g3
#Phenolic Content -- s8,g4
#Change in firmness -- s10,g5
pdf(file= "figure_3_timeline.pdf", width = 7, height = 3)
figure_3_experiment = ggdraw() +draw_plot(s8,x=0,y=0,width = 0.66,height = 1) +draw_plot(g8,x=0.66,y=0,width = 0.33,height = 1)
figure_3_experiment
dev.off()
pdf(file= "Supp_figure_2.pdf", width = 8.5, height = 11)
ggarrange(s1,g1,s2,g2,s3,g3,s4,g4,s5,g5,ncol = 2, nrow = 5, common.legend = TRUE)
dev.off()
pdf(file= "Supp_figure_3.pdf", width = 8.5, height = 11)
ggarrange(s6,g6,s7,g7,s8,g8,s9,g9,s10,g10, ncol = 2, nrow = 5, common.legend = TRUE)
dev.off()