diff --git a/DESCRIPTION b/DESCRIPTION
index 67daad2..a962934 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -2,14 +2,14 @@ Package: walker
 Type: Package
 Title: Bayesian Generalized Linear Models with Time-Varying Coefficients
 Version: 0.4.1
-Date: 2020-05-19
+Date: 2020-08-10
 Description: Bayesian generalized linear models with time-varying coefficients. 
     Gaussian, Poisson, and binomial observations are supported. 
     The Markov chain Monte Carlo computations are done using 
     Hamiltonian Monte Carlo provided by Stan, using a state space representation 
     of the model in order to marginalise over the coefficients for efficient sampling. 
     For non-Gaussian models, the package uses the importance sampling type estimators based on 
-    approximate marginal MCMC as in Vihola, Helske, Franks (2017, <arXiv:1609.02541>).
+    approximate marginal MCMC as in Vihola, Helske, Franks (2020, <arXiv:1609.02541>).
 Authors@R: 
     person(given = "Jouni",
            family = "Helske",
diff --git a/R/walker.R b/R/walker.R
index 8776f9a..bd5e7ed 100644
--- a/R/walker.R
+++ b/R/walker.R
@@ -93,7 +93,31 @@
 #' pred <- predict(fit, newdata)
 #' plot_predict(pred)
 #' }
+#' \dontrun{
+#' # example on scalability
+#' set.seed(1)
+#' n <- 2^12
+#' beta1 <- cumsum(c(0.5, rnorm(n - 1, 0, sd = 0.05)))
+#' beta2 <- cumsum(c(-1, rnorm(n - 1, 0, sd = 0.15)))
+#' x1 <- rnorm(n, mean = 2)
+#' x2 <- cos(1:n)
+#' rw <- cumsum(rnorm(n, 0, 0.5))
+#' signal <- rw + beta1 * x1 + beta2 * x2
+#' y <- rnorm(n, signal, 0.5)
 #' 
+#' d <- data.frame(y, x1, x2)
+#' 
+#' n <- 2^(6:12)
+#' times <- numeric(length(n))
+#' for(i in seq_along(n)) {
+#'   times[i] <- sum(get_elapsed_time(
+#'     walker(y ~ 0 + rw1(~ x1 + x2, 
+#'       beta = c(0, 10), sigma = c(0, 10)), 
+#'       sigma_y = c(0, 10), data = d[1:n[i],],
+#'       chains = 1, seed = 1, refresh = 0)$stanfit))
+#' }
+#' plot(log2(n), log2(times))
+#'}
 walker <- function(formula, data, sigma_y_prior, beta, init, chains,
                    return_x_reg = FALSE, gamma_y = NULL, ...) {
   
diff --git a/inst/CITATION b/inst/CITATION
index 6eec837..84ef21a 100644
--- a/inst/CITATION
+++ b/inst/CITATION
@@ -1,11 +1,10 @@
 c(
   bibentry(
-    bibtype = "Manual",
-    title = "{walker}: Bayesian Generalized Linear Models with Time-Varying Coefficients",
+    bibtype = "Article",
     author = "Jouni Helske",
-    year = sub("-.*", "", meta$Date),
-    note = sprintf("R package version %s", meta$Version),
-    url = "http://github.com/helske/walker",
+    title = " Efficient Bayesian generalized linear models with time-varying coefficients: The walker package in {R}",
+    journal = "Submitted",
+    year = "2020",
     key = "walker"
   ),
   bibentry(
@@ -15,7 +14,17 @@ c(
     journal = "Ar{X}iv e-prints",
     eprint = "1609.02541",
     primaryClass = "stat.CO",
-    year = "2020",
+    year = "2017",
     key = "ISMCMC"
+  ),
+  bibentry(
+    bibtype = "Manual",
+    title = "{walker}: Bayesian Generalized Linear Models with Time-Varying Coefficients",
+    author = "Jouni Helske",
+    year = sub("-.*", "", meta$Date),
+    note = sprintf("R package version %s", meta$Version),
+    url = "http://github.com/helske/walker",
+    key = "package"
   )
+ 
 )
diff --git a/vignettes/walker.Rmd b/vignettes/walker.Rmd
index 3c20b40..f241154 100644
--- a/vignettes/walker.Rmd
+++ b/vignettes/walker.Rmd
@@ -156,7 +156,7 @@ With naive implementation we get smaller effective sample sizes and much higher
 
 In this vignette we illustrated the benefits of marginalisation in the context of time-varying regression models. The underlying idea is not new; this approach is typical in classic Metropolis-type algorithms for linear-Gaussian state space models where the marginal likelihood $p(y | \theta)$ (where $\theta$ denotes the hyperparameters i.e. not the latents states such as $\beta$'s in current context) is used in the computation of the acceptance probability. Here we rely on readily available Hamiltonian Monte Carlo based `Stan` software, thus allowing us to enjoy the benefits of diverse tools of the `Stan` community. 
 
-The motivation behind the `walker` was to test the efficiency of importance sampling type weighting method of @vihola-helske-franks in a dynamic generalized linear model setting within the HMC context. Although some of our other preliminary results have suggested that naive combination of the HMC with IS weighting can provide rather limited computational gains, in case of dynamic GLMs so called global approximation technique [@vihola-helske-franks] provides fast and robust alternative to standard HMC approach of `Stan`.
+The motivation behind the `walker` was to test the efficiency of importance sampling type weighting method of @vihola-helske-franks in a dynamic generalized linear model setting within the HMC context. Although some of our other preliminary results have suggested that naive combination of the HMC with IS weighting can provide rather limited computational gains, in case of GLMs with time-varying coefficients so called global approximation technique [@vihola-helske-franks] provides fast and robust alternative to standard HMC approach of `Stan`.
 
 ## Acknowledgements