-
Notifications
You must be signed in to change notification settings - Fork 206
/
demo2_4.R
108 lines (96 loc) · 3.75 KB
/
demo2_4.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
#' ---
#' title: "Bayesian data analysis demo 2.4"
#' author: "Aki Vehtari, Markus Paasiniemi"
#' date: "`r format(Sys.Date())`"
#' output:
#' html_document:
#' theme: readable
#' code_download: true
#' ---
#' ## Probability of a girl birth given placenta previa (BDA3 p. 37).
#'
#' Calculate the posterior distribution on a discrete grid of points by
#' multiplying the likelihood and a non-conjugate prior at each point,
#' and normalizing over the points. Simulate draws from the resulting
#' non-standard posterior distribution using inverse cdf using the
#' discrete grid.
#'
#' ggplot2 and gridExtra are used for plotting, tidyr for manipulating data frames
#+ setup, message=FALSE, error=FALSE, warning=FALSE
library(ggplot2)
theme_set(theme_minimal())
library(gridExtra)
library(tidyr)
library(dplyr)
#' ### Evaluating posterior with non-conjugate prior in grid
#'
#' Posterior with observations (437,543) and uniform prior (Beta(1,1))
a <- 437
b <- 543
#' Evaluate densities at evenly spaced points between 0.1 and 1
df1 <- data.frame(theta = seq(0.1, 1, 0.001))
df1$con <- dbeta(df1$theta, a+1, b+1)
#' Compute the density of non-conjugate prior in discrete points, i.e. in a grid
#' this non-conjugate prior is the same as in figure 2.4 in the book
pp <- rep(1, nrow(df1))
pi <- sapply(c(0.388, 0.488, 0.588), function(pi) which(df1$theta == pi))
pm <- 11
pp[pi[1]:pi[2]] <- seq(1, pm, length.out = length(pi[1]:pi[2]))
pp[pi[3]:pi[2]] <- seq(1, pm, length.out = length(pi[3]:pi[2]))
#' normalize the prior
df1$nc_p <- pp / sum(pp)
#' compute the un-normalized non-conjugate posterior in a grid
po <- dbinom(a, a+b, df1$theta) * pp
#' normalize the posterior
df1$nc_po <- po / sum(po)
#' Plot posterior with uniform prior, non-conjugate
#' prior and the corresponding non-conjugate posterior
# pivot the data frame into key-value pairs
# and change variable names for plotting
df2 <- df1 %>%
pivot_longer(cols = -theta, names_to = "grp", values_to = "p") %>%
mutate(grp = factor(grp, labels=c('Posterior with uniform prior',
'Non-conjugate prior',
'Non-conjugate posterior')))
## levels(df2$grp) <-
ggplot(data = df2) +
geom_line(aes(theta, p)) +
facet_wrap(~grp, ncol = 1, scales = 'free_y') +
coord_cartesian(xlim = c(0.35,0.6)) +
scale_y_continuous(breaks=NULL) +
labs(x = '', y = '')
#' ### Inverse cdf sampling
#'
#' compute the cumulative density in a grid
df1$cs_po <- cumsum(df1$nc_po)
#' Sample from uniform distribution U(0,1)
# set.seed(seed) is used to set seed for the randon number generator
set.seed(2601)
# runif(k) returns k uniform random numbers from interval [0,1]
r <- runif(10000)
#' Inverse-cdf sampling
# function to find the value smallest value theta at which the cumulative
# sum of the posterior densities is greater than r.
invcdf <- function(r, df) df$theta[sum(df$cs_po < r) + 1]
# sapply function for each sample r. The returned values s are now
# random draws from the distribution.
s <- sapply(r, invcdf, df1)
#' Create three plots: p1 is the posterior, p2 is the cdf of the posterior
#' and p3 is the histogram of posterior draws (drawn using inv-cdf)
p1 <- ggplot(data = df1) +
geom_line(aes(theta, nc_po)) +
coord_cartesian(xlim = c(0.35, 0.6)) +
labs(title = 'Non-conjugate posterior', x = '', y = '') +
scale_y_continuous(breaks = NULL)
p2 <- ggplot(data = df1) +
geom_line(aes(theta, cs_po)) +
coord_cartesian(xlim = c(0.35, 0.6)) +
labs(title = 'Posterior-cdf', x = '', y = '') +
scale_y_continuous(breaks = NULL)
p3 <- ggplot() +
geom_histogram(aes(s), binwidth = 0.003) +
coord_cartesian(xlim = c(0.35, 0.6)) +
labs(title = 'Histogram of posterior draws', x = '', y = '') +
scale_y_continuous(breaks = NULL)
# combine the plots
grid.arrange(p1, p2, p3)