From 53a58e59b3d479bfa77e553672f1932c499ab5a3 Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 18 Sep 2019 10:05:10 +0200 Subject: [PATCH 01/13] improved downscaleChunk --- R/downscaleCV.R | 2 +- R/downscaleChunk.R | 10 +++++++++- man/downscaleCV.Rd | 2 +- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/R/downscaleCV.R b/R/downscaleCV.R index e3b9de77..6bc4517f 100644 --- a/R/downscaleCV.R +++ b/R/downscaleCV.R @@ -44,7 +44,7 @@ #' @param ... Optional parameters. These parameters are different depending on the method selected. #' Every parameter has a default value set in the atomic functions in case that no selection is wanted. #' Everything concerning these parameters is explained in the section \code{Details} of the function \code{\link[downscaleR]{downscaleTrain}}. However, if wanted, the atomic functions can be seen here: -#' \code{\link[downscaleR]{glm.train}} and \code{\link[deepnet]{nn.train}}. +#' \code{\link[downscaleR]{analogs.train}}, \code{\link[downscaleR]{glm.train}} and \code{\link[deepnet]{nn.train}}. #' @details The function relies on \code{\link[downscaleR]{prepareData}}, \code{\link[downscaleR]{prepareNewData}}, \code{\link[downscaleR]{downscaleTrain}}, and \code{\link[downscaleR]{downscalePredict}}. #' For more information please visit these functions. It is envisaged to allow for a flexible fine-tuning of the cross-validation scheme. It uses internally the \pkg{transformeR} #' helper \code{\link[transformeR]{dataSplit}} for flexible data folding. diff --git a/R/downscaleChunk.R b/R/downscaleChunk.R index 1546b888..061b145f 100644 --- a/R/downscaleChunk.R +++ b/R/downscaleChunk.R @@ -87,5 +87,13 @@ downscaleChunk <- function(x, y, newdata, }) p <- NULL }) - NULL + + pred <- list() + for (i in 1:(length(newdata)+1)) { + lf <- list.files("./", pattern = paste0("dataset",i), full.names = TRUE) + chunk.list <- lapply(lf, function(x) get(load(x))) + pred[[i]] <- bindGrid(chunk.list, dimension = "lat") + file.remove(lf) + } + return(pred) } diff --git a/man/downscaleCV.Rd b/man/downscaleCV.Rd index f52125a6..ca038d99 100644 --- a/man/downscaleCV.Rd +++ b/man/downscaleCV.Rd @@ -46,7 +46,7 @@ more details about this parameter.} \item{...}{Optional parameters. These parameters are different depending on the method selected. Every parameter has a default value set in the atomic functions in case that no selection is wanted. Everything concerning these parameters is explained in the section \code{Details} of the function \code{\link[downscaleR]{downscaleTrain}}. However, if wanted, the atomic functions can be seen here: -\code{\link[downscaleR]{glm.train}} and \code{\link[deepnet]{nn.train}}.} +\code{\link[downscaleR]{analogs.train}}, \code{\link[downscaleR]{glm.train}} and \code{\link[deepnet]{nn.train}}.} } \value{ The reconstructed downscaled temporal serie. From af181b2ffeb5780cfcd13c624644ff89f135a1bd Mon Sep 17 00:00:00 2001 From: Ana Date: Mon, 14 Oct 2019 18:06:10 +0200 Subject: [PATCH 02/13] add QDM and DQM bias correction methods --- R/biasCorrection.R | 200 ++++++++++++++++++++++++++++++++++++++-- man/biasCorrection.Rd | 40 ++++++-- man/biasCorrection1D.Rd | 6 +- man/dqm.Rd | 35 +++++++ man/qdm.Rd | 36 ++++++++ 5 files changed, 300 insertions(+), 17 deletions(-) create mode 100644 man/dqm.Rd create mode 100644 man/qdm.Rd diff --git a/R/biasCorrection.R b/R/biasCorrection.R index c82dbd8b..a4ab6dfb 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -42,7 +42,7 @@ #' (\code{densfun}, \code{start}, \code{...}). Only used when applying the "pqm" method #' (parametric quantile mapping). Please, read the \code{\link[MASS]{fitdistr}} help #' document carefully before setting the parameters in \code{fitdistr.args}. -#' @param n.quantiles Integer indicating the number of quantiles to be considered when method = "eqm". Default is NULL, +#' @param n.quantiles Integer indicating the number of quantiles to be considered when method = "eqm", "dqm", "qdm". Default is NULL, #' that considers all quantiles, i.e. \code{n.quantiles = length(x[i,j])} (being \code{i} and \code{j} the #' coordinates in a single location). #' @param extrapolation Character indicating the extrapolation method to be applied to correct values in @@ -52,6 +52,7 @@ #' above which precipitation (temperature) values are fitted to a Generalized Pareto Distribution (GPD). #' Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th #' percentile (5th percentile for the left tail). Only for \code{"gpqm"} method. +#' @param detrend logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE. #' @param join.members Logical indicating whether members should be corrected independently (\code{FALSE}, the default), #' or joined before performing the correction (\code{TRUE}). It applies to multimember grids only (otherwise ignored). #' @template templateParallelParams @@ -59,9 +60,9 @@ #' @details #' #' The methods available are \code{"eqm"}, \code{"delta"}, -#' \code{"scaling"}, \code{"pqm"}, \code{"gpqm"}\code{"loci"}, -#' \code{"ptr"} (the four latter used only for precipitation) and -#' \code{"variance"} (only for temperature). +#' \code{"scaling"}, \code{"pqm"}, \code{"gpqm"}, \code{"loci"}, +#' \code{"ptr"} (the four latter used only for precipitation), +#' \code{"variance"} (only for temperature), \code{"dqm"} and \code{"qdm"}. #' #' These are next briefly described: #' @@ -105,6 +106,7 @@ #' The user can specify a different threshold by modifying the parameter theta. It is applicable to precipitation data. #' #' \strong{mva} +#' #' Mean and Variance Adjustment. #' #' \strong{variance} @@ -123,6 +125,23 @@ #' time series in an exponential form. The power parameter is estimated on a monthly basis using a 90-day window centered on the interval. The power is defined by matching the coefficient #' of variation of corrected daily simulated precipitation with the coefficient of variation of observed daily precipitation. It is calculated by root-finding algorithm using Brent's method. #' +#'\strong{dqm} +#' +#' Detrended quantile matching with delta-method extrapolation, described in Cannon et al. 2015. +#' It consists on (i) removing the long-term mean (linear) trend; +#' (ii) eqm is applied to the detrended series; +#' (iii) the mean trend is then reapplied to the bias-adjusted series. +#' It preserves the mean change signal in a climate change context. +#' It allows relative (multiplicative) and additive corrections. +#' +#'\strong{qdm} +#' +#' Quantile delta mapping, described in Cannon et al. 2015. +#' It consists on (i) detrending the individual quantiles; +#' (ii) QM is applied to the detrended series; +#' (iii) the projected trends are then reapplied to the bias-adjusted quantiles. +#' It explicitly preserves the change signal in all quantiles. +#' It allows relative (multiplicative) and additive corrections. #' #' @section Note on the bias correction of precipitation: #' @@ -158,6 +177,7 @@ #' #' \item M. R. Tye, D. B. Stephenson, G. J. Holland and R. W. Katz (2014) A Weibull Approach for Improving Climate Model Projections of Tropical Cyclone Wind-Speed Distributions, Journal of Climate, 27, 6119-6133 #' +#' \item Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, https://doi.org/10.1175/JCLI-D-14-00754.1 #' } #' @author S. Herrera, M. Iturbide, J. Bedia #' @export @@ -246,7 +266,7 @@ biasCorrection <- function(y, x, newdata = NULL, precipitation = FALSE, - method = c("delta", "scaling", "eqm", "pqm", "gpqm", "loci"), + method = c("delta", "scaling", "eqm", "pqm", "gpqm", "loci","dqm","qdm"), cross.val = c("none", "loo", "kfold"), folds = NULL, window = NULL, @@ -256,12 +276,13 @@ biasCorrection <- function(y, x, newdata = NULL, precipitation = FALSE, n.quantiles = NULL, extrapolation = c("none", "constant"), theta = .95, + detrend=TRUE, join.members = FALSE, parallel = FALSE, max.ncores = 16, ncores = NULL) { if (method == "gqm") stop("'gqm' is not a valid choice anymore. Use method = 'pqm' instead and set fitdistr.args = list(densfun = 'gamma')") - method <- match.arg(method, choices = c("delta", "scaling", "eqm", "pqm", "gpqm", "mva", "loci", "ptr", "variance")) + method <- match.arg(method, choices = c("delta", "scaling", "eqm", "pqm", "gpqm", "mva", "loci", "ptr", "variance","dqm","qdm")) cross.val <- match.arg(cross.val, choices = c("none", "loo", "kfold")) scaling.type <- match.arg(scaling.type, choices = c("additive", "multiplicative")) extrapolation <- match.arg(extrapolation, choices = c("none", "constant")) @@ -283,6 +304,7 @@ biasCorrection <- function(y, x, newdata = NULL, precipitation = FALSE, extrapolation = extrapolation, theta = theta, join.members = join.members, + detrend=detrend, parallel = parallel, max.ncores = max.ncores, ncores = ncores) @@ -324,6 +346,7 @@ biasCorrection <- function(y, x, newdata = NULL, precipitation = FALSE, fitdistr.args = fitdistr.args, pr.threshold = wet.threshold, n.quantiles = n.quantiles, extrapolation = extrapolation, theta = theta, join.members = join.members, + detrend=detrend, parallel = parallel, max.ncores = max.ncores, ncores = ncores) @@ -356,6 +379,7 @@ biasCorrectionXD <- function(y, x, newdata, extrapolation, theta, join.members, + detrend, parallel = FALSE, max.ncores = 16, ncores = NULL) { @@ -445,6 +469,7 @@ biasCorrectionXD <- function(y, x, newdata, n.quantiles = n.quantiles, extrapolation = extrapolation, theta = theta, + detrend=detrend, parallel = parallel, max.ncores = max.ncores, ncores = ncores) @@ -505,6 +530,7 @@ biasCorrectionXD <- function(y, x, newdata, #' above which precipitation (temperature) values are fitted to a Generalized Pareto Distribution (GPD). #' Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th #' percentile (5th percentile for the left tail). Only for \code{"gpqm"} method. +#' @param detrend logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE. #' @template templateParallelParams #' #' @@ -521,6 +547,7 @@ biasCorrection1D <- function(o, p, s, n.quantiles, extrapolation, theta, + detrend, parallel = FALSE, max.ncores = 16, ncores = NULL) { @@ -549,7 +576,12 @@ biasCorrection1D <- function(o, p, s, mapply_fun(loci, o, p, s, MoreArgs = list(precip, pr.threshold)) } else if (method == "ptr") { mapply_fun(ptr, o, p, s, MoreArgs = list(precip)) + } else if (method == "dqm") { + mapply_fun(dqm, o, p, s, MoreArgs = list(precip, pr.threshold, n.quantiles, detrend)) + } else if (method == "qdm") { + mapply_fun(qdm, o, p, s, MoreArgs = list(precip, pr.threshold, n.quantiles)) } + #INCLUIR AQUI METODOS NUEVOS } #' @title adjustPrecipFreq @@ -1047,6 +1079,162 @@ recoverMemberDim <- function(plain.grid, bc.grid, newdata) { do.call("bindGrid", c(aux.list, dimension = "member")) } +#' @title Detrended quantile matching +#' @description Detrended quantile matching with delta-method extrapolation +#' @param o A vector (e.g. station data) containing the observed climate data for the training period +#' @param p A vector containing the simulated climate by the model for the training period. +#' @param s A vector containing the simulated climate for the variable used in \code{p}, but considering the test period. +#' @param precip Logical indicating if o, p, s is precipitation data. +#' @param pr.threshold Integer. The minimum value that is considered as a non-zero precipitation. +#' @param detrend logical. Detrend data prior to bias correction? Default. TRUE. +#' @param n.quantiles Integer. Maximum number of quantiles to estimate. Default: same as data length. +#' @details DQM method developed by A. Canon, from \url{https://github.com/pacificclimate/ClimDown}, \url{https://cran.r-project.org/web/packages/ClimDown/}. +#' +#' See also Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, \url{https://doi.org/10.1175/JCLI-D-14-00754.1} +#' @keywords internal +#' @author A. Cannon (acannon@uvic.ca), A. Casanueva + +dqm <- function(o, p, s, precip, pr.threshold, n.quantiles, detrend=TRUE){ + + if (all(is.na(o)) | all(is.na(p)) | all(is.na(s))) { + return(yout=rep(NA, length(s))) + + } else{ + + if(precip){ + # For ratio data, treat exact zeros as left censored values less than pr.threshold + epsilon <- .Machine$double.eps + o[o < pr.threshold & !is.na(o)] <- runif(sum(o < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + p[p < pr.threshold & !is.na(p)] <- runif(sum(p < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + s[s < pr.threshold & !is.na(s)] <- runif(sum(s < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + } + + o.mn <- mean(o, na.rm=T) + p.mn <- mean(p, na.rm=T) + if(precip){ + s <- s/p.mn*o.mn + } else{ + s <- s-p.mn+o.mn + } + + if(detrend){ + s.mn <- lm.fit(cbind(1, seq_along(s)), s)$fitted + } else{ + s.mn <- o.mn + } + if(is.null(n.quantiles)) n.quantiles <- max(length(o), length(p)) + tau <- c(0, (1:n.quantiles)/(n.quantiles+1), 1) + if(precip & any(o < sqrt(.Machine$double.eps), na.rm=TRUE)){ + x <- quantile(p/p.mn, tau, na.rm=T) + y <- quantile(o/o.mn, tau, na.rm=T) + yout <- approx(x, y, xout=s/s.mn, rule=2:1)$y # if rule = 1, NAs are returned outside the training interval; if rule= 2, the value at the closest data extreme is used. rule = 2:1, if the left and right side extrapolation should differ. + extrap <- is.na(yout) + yout[extrap] <- max(o/o.mn, na.rm=T)*((s/s.mn)[extrap]/max(p/p.mn, na.rm=T)) # extrapolation on the upper tail + yout <- yout*s.mn + #yout.h <- approx(x, y, xout=p/p.mn, rule=1)$y*o.mn + } else if(precip & !any(o < sqrt(.Machine$double.eps), na.rm=TRUE)){ + x <- quantile(p/p.mn, tau, na.rm=T) + y <- quantile(o/o.mn, tau, na.rm=T) + yout <- approx(x, y, xout=s/s.mn, rule=1)$y + extrap.lower <- is.na(yout) & ((s/s.mn) < min(p/p.mn, na.rm=T)) + extrap.upper <- is.na(yout) & ((s/s.mn) > max(p/p.mn, na.rm=T)) + yout[extrap.lower] <- min(o/o.mn, na.rm=T)*((s/s.mn)[extrap.lower]/ + min(p/p.mn, na.rm=T)) + yout[extrap.upper] <- max(o/o.mn, na.rm=T)*((s/s.mn)[extrap.upper]/ + max(p/p.mn, na.rm=T)) + yout <- yout*s.mn + #yout.h <- approx(x, y, xout=p/p.mn, rule=1)$y*o.mn + } else{ + x <- quantile(p-p.mn, tau, na.rm=T) + y <- quantile(o-o.mn, tau, na.rm=T) + yout <- approx(x, y, xout=s-s.mn, rule=1)$y + extrap.lower <- is.na(yout) & ((s-s.mn) < min(p-p.mn, na.rm=T)) + extrap.upper <- is.na(yout) & ((s-s.mn) > max(p-p.mn, na.rm=T)) + yout[extrap.lower] <- min(o-o.mn) + ((s-s.mn)[extrap.lower]- + min(p-p.mn, na.rm=T)) + yout[extrap.upper] <- max(o-o.mn) + ((s-s.mn)[extrap.upper]- + max(p-p.mn, na.rm=T)) + yout <- yout+s.mn + #yout.h <- approx(x, y, xout=p-p.mn, rule=1)$y+o.mn + } + if(precip){ + yout[which(yout < sqrt(.Machine$double.eps))] <- 0 + #yout.h[which(yout.h < sqrt(.Machine$double.eps))] <- 0 + } + return(yout) + } +} + + +#' @title Quantile delta mapping +#' @description Quantile delta mapping +#' @param o A vector (e.g. station data) containing the observed climate data for the training period +#' @param p A vector containing the simulated climate by the model for the training period. +#' @param s A vector containing the simulated climate for the variable used in \code{p}, but considering the test period. +#' @param precip Logical indicating if o, p, s is precipitation data. +#' @param pr.threshold Integer. The minimum value that is considered as a non-zero precipitation. +#' \code{precip = FALSE}. +#' @param jitter.factor Integer. Jittering to accomodate ties. Default: 0.01. +#' @param n.quantiles Integer. Maximum number of quantiles to estimate. Default: same as data length. +#' @details QDM method developed by A. Canon, from \url{https://github.com/pacificclimate/ClimDown}, \url{https://cran.r-project.org/web/packages/ClimDown/}. +#' +#' See also Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, \url{https://doi.org/10.1175/JCLI-D-14-00754.1} +#' @keywords internal +#' @author A. Cannon (acannon@uvic.ca), A. Casanueva + +qdm <- function(o, p, s, precip, pr.threshold, n.quantiles, jitter.factor=0.01){ + + # tau.s = F.s(x.s) + # delta = x.s {/,-} F.p^-1(tau.s) + # yout = F.o^-1(tau.s) {*,+} delta + + if (all(is.na(o)) | all(is.na(p)) | all(is.na(s))) { + return(yout=rep(NA, length(s))) + } else{ + + # Apply a small amount of jitter to accomodate ties due to limited measurement precision + o <- jitter(o, jitter.factor) + p <- jitter(p, jitter.factor) + s <- jitter(s, jitter.factor) + + # For ratio data, treat exact zeros as left censored values less than pr.threshold + if(precip){ + epsilon <- .Machine$double.eps + o[o < pr.threshold & !is.na(o)] <- runif(sum(o < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + p[p < pr.threshold & !is.na(p)] <- runif(sum(p < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + s[s < pr.threshold & !is.na(s)] <- runif(sum(s < pr.threshold, na.rm=TRUE), min=epsilon, max=pr.threshold) + } + + # Calculate empirical quantiles using Weibull plotting position + n <- max(length(o), length(p), length(s)) + if(is.null(n.quantiles)) n.quantiles <- n + tau <- seq(1/(n+1), n/(n+1), length=n.quantiles) + quant.o <- quantile(o, tau, type=6, na.rm=TRUE) + quant.p <- quantile(p, tau, type=6, na.rm=TRUE) + quant.s <- quantile(s, tau, type=6, na.rm=TRUE) + + # Apply QDM bias correction + tau.s <- approx(quant.s, tau, s, rule=2)$y + if(precip){ + delta <- s/approx(tau, quant.p, tau.s, rule=2)$y # if rule= 2, the value at the closest data extreme is used + yout <- approx(tau, quant.o, tau.s, rule=2)$y*delta + } else{ + delta <- s - approx(tau, quant.p, tau.s, rule=2)$y + yout <- approx(tau, quant.o, tau.s, rule=2)$y + delta + } + #yout.h <- approx(quant.p, quant.o, p, rule=2)$y + + # For precip data, set values less than threshold to zero + if(precip){ + #yout.h[yout.h < pr.threshold] <- 0 + yout[yout < pr.threshold] <- 0 + } + + return(yout) + } +} + + # biasCorrection.chunk.R Bias correction methods # diff --git a/man/biasCorrection.Rd b/man/biasCorrection.Rd index 02c4b022..f709696f 100644 --- a/man/biasCorrection.Rd +++ b/man/biasCorrection.Rd @@ -5,13 +5,13 @@ \title{Bias correction methods} \usage{ biasCorrection(y, x, newdata = NULL, precipitation = FALSE, - method = c("delta", "scaling", "eqm", "pqm", "gpqm", "loci"), - cross.val = c("none", "loo", "kfold"), folds = NULL, window = NULL, - scaling.type = c("additive", "multiplicative"), + method = c("delta", "scaling", "eqm", "pqm", "gpqm", "loci", "dqm", + "qdm"), cross.val = c("none", "loo", "kfold"), folds = NULL, + window = NULL, scaling.type = c("additive", "multiplicative"), fitdistr.args = list(densfun = "normal"), wet.threshold = 1, n.quantiles = NULL, extrapolation = c("none", "constant"), - theta = 0.95, join.members = FALSE, parallel = FALSE, - max.ncores = 16, ncores = NULL) + theta = 0.95, detrend = TRUE, join.members = FALSE, + parallel = FALSE, max.ncores = 16, ncores = NULL) } \arguments{ \item{y}{A grid or station data containing the observed climate data for the training period} @@ -56,7 +56,7 @@ document carefully before setting the parameters in \code{fitdistr.args}.} \item{wet.threshold}{The minimum value that is considered as a non-zero precipitation. Ignored when \code{precipitation = FALSE}. Default to 1 (assuming mm). See details on bias correction for precipitation.} -\item{n.quantiles}{Integer indicating the number of quantiles to be considered when method = "eqm". Default is NULL, +\item{n.quantiles}{Integer indicating the number of quantiles to be considered when method = "eqm", "dqm", "qdm". Default is NULL, that considers all quantiles, i.e. \code{n.quantiles = length(x[i,j])} (being \code{i} and \code{j} the coordinates in a single location).} @@ -69,6 +69,8 @@ above which precipitation (temperature) values are fitted to a Generalized Paret Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th percentile (5th percentile for the left tail). Only for \code{"gpqm"} method.} +\item{detrend}{logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE.} + \item{join.members}{Logical indicating whether members should be corrected independently (\code{FALSE}, the default), or joined before performing the correction (\code{TRUE}). It applies to multimember grids only (otherwise ignored).} @@ -87,9 +89,9 @@ Implementation of several standard bias correction methods } \details{ The methods available are \code{"eqm"}, \code{"delta"}, -\code{"scaling"}, \code{"pqm"}, \code{"gpqm"}\code{"loci"}, -\code{"ptr"} (the four latter used only for precipitation) and -\code{"variance"} (only for temperature). +\code{"scaling"}, \code{"pqm"}, \code{"gpqm"}, \code{"loci"}, +\code{"ptr"} (the four latter used only for precipitation), +\code{"variance"} (only for temperature), \code{"dqm"} and \code{"qdm"}. These are next briefly described: @@ -133,6 +135,7 @@ which the GPD is fitted is the 95th percentile of the observed and the predicted The user can specify a different threshold by modifying the parameter theta. It is applicable to precipitation data. \strong{mva} + Mean and Variance Adjustment. \strong{variance} @@ -150,6 +153,24 @@ The precipitation threshold is calculated such that the number of simulated days Power transformation of precipitation. This method is described in Leander and Buishand 2007 and is applicable only to precipitation. It adjusts the variance statistics of precipitation time series in an exponential form. The power parameter is estimated on a monthly basis using a 90-day window centered on the interval. The power is defined by matching the coefficient of variation of corrected daily simulated precipitation with the coefficient of variation of observed daily precipitation. It is calculated by root-finding algorithm using Brent's method. + +\strong{dqm} + +Detrended quantile matching with delta-method extrapolation, described in Cannon et al. 2015. +It consists on (i) removing the long-term mean (linear) trend; +(ii) eqm is applied to the detrended series; + (iii) the mean trend is then reapplied to the bias-adjusted series. +It preserves the mean change signal in a climate change context. +It allows relative (multiplicative) and additive corrections. + +\strong{qdm} + +Quantile delta mapping, described in Cannon et al. 2015. +It consists on (i) detrending the individual quantiles; +(ii) QM is applied to the detrended series; +(iii) the projected trends are then reapplied to the bias-adjusted quantiles. +It explicitly preserves the change signal in all quantiles. +It allows relative (multiplicative) and additive corrections. } \section{Note on the bias correction of precipitation}{ @@ -260,6 +281,7 @@ eqm.join <- biasCorrection(y = y, x = x, \item M. R. Tye, D. B. Stephenson, G. J. Holland and R. W. Katz (2014) A Weibull Approach for Improving Climate Model Projections of Tropical Cyclone Wind-Speed Distributions, Journal of Climate, 27, 6119-6133 +\item Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, https://doi.org/10.1175/JCLI-D-14-00754.1 } } \seealso{ diff --git a/man/biasCorrection1D.Rd b/man/biasCorrection1D.Rd index 6a88ef0e..b78b6d50 100644 --- a/man/biasCorrection1D.Rd +++ b/man/biasCorrection1D.Rd @@ -5,8 +5,8 @@ \title{Bias correction methods on 1D data} \usage{ biasCorrection1D(o, p, s, method, scaling.type, fitdistr.args, precip, - pr.threshold, n.quantiles, extrapolation, theta, parallel = FALSE, - max.ncores = 16, ncores = NULL) + pr.threshold, n.quantiles, extrapolation, theta, detrend, + parallel = FALSE, max.ncores = 16, ncores = NULL) } \arguments{ \item{o}{A vector (e.g. station data) containing the observed climate data for the training period} @@ -47,6 +47,8 @@ above which precipitation (temperature) values are fitted to a Generalized Paret Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th percentile (5th percentile for the left tail). Only for \code{"gpqm"} method.} +\item{detrend}{logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE.} + \item{parallel}{Logical. Should parallel execution be used?} \item{max.ncores}{Integer. Upper bound for user-defined number of cores.} diff --git a/man/dqm.Rd b/man/dqm.Rd new file mode 100644 index 00000000..e4f5964c --- /dev/null +++ b/man/dqm.Rd @@ -0,0 +1,35 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/biasCorrection.R +\name{dqm} +\alias{dqm} +\title{Detrended quantile matching} +\usage{ +dqm(o, p, s, precip, pr.threshold, n.quantiles, detrend = TRUE) +} +\arguments{ +\item{o}{A vector (e.g. station data) containing the observed climate data for the training period} + +\item{p}{A vector containing the simulated climate by the model for the training period.} + +\item{s}{A vector containing the simulated climate for the variable used in \code{p}, but considering the test period.} + +\item{precip}{Logical indicating if o, p, s is precipitation data.} + +\item{pr.threshold}{Integer. The minimum value that is considered as a non-zero precipitation.} + +\item{n.quantiles}{Integer. Maximum number of quantiles to estimate. Default: same as data length.} + +\item{detrend}{logical. Detrend data prior to bias correction? Default. TRUE.} +} +\description{ +Detrended quantile matching with delta-method extrapolation +} +\details{ +DQM method developed by A. Canon, from \url{https://github.com/pacificclimate/ClimDown}, \url{https://cran.r-project.org/web/packages/ClimDown/}. + + See also Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, \url{https://doi.org/10.1175/JCLI-D-14-00754.1} +} +\author{ +A. Cannon (acannon@uvic.ca), A. Casanueva +} +\keyword{internal} diff --git a/man/qdm.Rd b/man/qdm.Rd new file mode 100644 index 00000000..2c9ed47d --- /dev/null +++ b/man/qdm.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/biasCorrection.R +\name{qdm} +\alias{qdm} +\title{Quantile delta mapping} +\usage{ +qdm(o, p, s, precip, pr.threshold, n.quantiles, jitter.factor = 0.01) +} +\arguments{ +\item{o}{A vector (e.g. station data) containing the observed climate data for the training period} + +\item{p}{A vector containing the simulated climate by the model for the training period.} + +\item{s}{A vector containing the simulated climate for the variable used in \code{p}, but considering the test period.} + +\item{precip}{Logical indicating if o, p, s is precipitation data.} + +\item{pr.threshold}{Integer. The minimum value that is considered as a non-zero precipitation. +\code{precip = FALSE}.} + +\item{n.quantiles}{Integer. Maximum number of quantiles to estimate. Default: same as data length.} + +\item{jitter.factor}{Integer. Jittering to accomodate ties. Default: 0.01.} +} +\description{ +Quantile delta mapping +} +\details{ +QDM method developed by A. Canon, from \url{https://github.com/pacificclimate/ClimDown}, \url{https://cran.r-project.org/web/packages/ClimDown/}. + +See also Cannon, A.J., S.R. Sobie, and T.Q. Murdock (2015) Bias Correction of GCM Precipitation by Quantile Mapping: How Well Do Methods Preserve Changes in Quantiles and Extremes?. J. Climate, 28, 6938–6959, \url{https://doi.org/10.1175/JCLI-D-14-00754.1} +} +\author{ +A. Cannon (acannon@uvic.ca), A. Casanueva +} +\keyword{internal} From 988807ed2c529c71004fd60895be52864d7d8e88 Mon Sep 17 00:00:00 2001 From: Ana Date: Thu, 31 Oct 2019 11:30:05 +0100 Subject: [PATCH 03/13] method gpqm - produce NA if model data is all NA --- R/biasCorrection.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index a4ab6dfb..60ea2de1 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -866,7 +866,7 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { stop("method gpqm is only applied to precipitation data") } else { threshold <- pr.threshold - if (any(!is.na(o))) { + if (any(!is.na(o)) & any(!is.na(p))) { params <- adjustPrecipFreq(o, p, threshold) p <- params$p nP <- params$nP From 41806563f8c1018eaf640126013f3ed12923a3ed Mon Sep 17 00:00:00 2001 From: Ana Date: Thu, 7 Nov 2019 17:46:55 +0100 Subject: [PATCH 04/13] add gpqm for temperature --- NAMESPACE | 2 ++ R/biasCorrection.R | 52 ++++++++++++++++++++++++++++++++++++------- man/biasCorrection.Rd | 12 +++++----- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 0cd736c5..9d5c20d3 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -53,8 +53,10 @@ importFrom(stats,na.exclude) importFrom(stats,na.omit) importFrom(stats,nls) importFrom(stats,pgamma) +importFrom(stats,pnorm) importFrom(stats,predict) importFrom(stats,qgamma) +importFrom(stats,qnorm) importFrom(stats,quantile) importFrom(stats,resid) importFrom(stats,rgamma) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index 60ea2de1..3867f653 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -51,7 +51,7 @@ #' @param theta numeric indicating upper threshold (and lower for the left tail of the distributions, if needed) #' above which precipitation (temperature) values are fitted to a Generalized Pareto Distribution (GPD). #' Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th -#' percentile (5th percentile for the left tail). Only for \code{"gpqm"} method. +#' percentile (and 5th percentile for the left tail). Only for \code{"gpqm"} method. #' @param detrend logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE. #' @param join.members Logical indicating whether members should be corrected independently (\code{FALSE}, the default), #' or joined before performing the correction (\code{TRUE}). It applies to multimember grids only (otherwise ignored). @@ -101,9 +101,11 @@ #' Generalized Quantile Mapping (described in Gutjahr and Heinemann 2013) is also a parametric quantile mapping (see #' method 'pqm') but using two teorethical distributions, the gamma distribution and Generalized Pareto Distribution (GPD). #' By default, It applies a gamma distribution to values under the threshold given by the 95th percentile -#' (following Yang et al. 2010) and a general Pareto distribution (GPD) to values above the threshold. the threshold above -#' which the GPD is fitted is the 95th percentile of the observed and the predicted wet-day distribution, respectively. -#' The user can specify a different threshold by modifying the parameter theta. It is applicable to precipitation data. +#' (following Yang et al. 2010) and a general Pareto distribution (GPD) to values above the threshold. The threshold above +#' which the GPD is fitted is the 95th percentile of the observed and the predicted wet-day distribution, respectively. If precip=FALSE +#' values below the 5th percentile of the observed and the predicted distributions are additionally fitted using GPD and +#' the rest of the values of the distributions are fitted using a normal distribution. +#' The user can specify a different threshold(s) by modifying the parameter theta. #' #' \strong{mva} #' @@ -275,7 +277,7 @@ biasCorrection <- function(y, x, newdata = NULL, precipitation = FALSE, wet.threshold = 1, n.quantiles = NULL, extrapolation = c("none", "constant"), - theta = .95, + theta = c(.95,.05), detrend=TRUE, join.members = FALSE, parallel = FALSE, @@ -857,14 +859,48 @@ eqm <- function(o, p, s, precip, pr.threshold, n.quantiles, extrapolation){ #' @importFrom evd fpot #' @importFrom MASS fitdistr #' @importFrom evd qgpd pgpd -#' @importFrom stats quantile pgamma qgamma +#' @importFrom stats quantile pgamma qgamma pnorm qnorm #' @keywords internal #' @author S. Herrera and M. Iturbide gpqm <- function(o, p, s, precip, pr.threshold, theta) { if (precip == FALSE) { - stop("method gpqm is only applied to precipitation data") + # stop("method gpqm is only applied to precipitation data") + # For temperature, lower (values below theta.low) and upper (values above theta) tails of the distribution are fitted with GPD. + if (all(is.na(o)) | all(is.na(p))) { + s <- rep(NA, length(s)) + } else{ + theta.low <- theta[2] + theta <- theta[1] + ind <- which(!is.na(o)) + indnormal <- ind[which((o[ind] < quantile(o[ind], theta)) & (o[ind] > quantile(o[ind], theta.low)))] + indparetoUp <- ind[which(o[ind] >= quantile(o[ind], theta))] + indparetoLow <- ind[which(o[ind] <= quantile(o[ind], theta.low))] + obsGQM <- fitdistr(o[indnormal],"normal") + obsGQM2Up <- fpot(o[indparetoUp], quantile(o[ind], theta), "gpd", std.err = FALSE) + obsGQM2Low <- fpot(-o[indparetoLow], -quantile(o[ind], theta.low), "gpd", std.err = FALSE) + indp <- which(!is.na(p)) + indnormalp <- indp[which((p[indp] < quantile(p[indp],theta)) & (p[indp] > quantile(p[indp], theta.low)))] + indparetopUp <- indp[which(p[indp] >= quantile(p[indp], theta))] + indparetopLow <- indp[which(p[indp] <= quantile(p[indp], theta.low))] + prdGQM <- fitdistr(p[indnormalp], "normal") + prdGQM2Up <- fpot(p[indparetopUp], quantile(p[indp], theta), "gpd", std.err = FALSE) + prdGQM2Low <- fpot(-p[indparetopLow], -quantile(p[indp], theta.low), "gpd", std.err = FALSE) + inds <- which(!is.na(s)) + indnormalsim <- inds[which((s[inds] < quantile(p[indp], theta)) & (s[inds] > quantile(p[indp], theta.low)))] + indparetosimUp <- inds[which(s[inds] >= quantile(p[indp], theta))] + indparetosimLow <- inds[which(s[inds] <= quantile(p[indp], theta.low))] + auxF <- pnorm(s[indnormalsim], prdGQM$estimate[1], prdGQM$estimate[2]) + auxF2Up <- pgpd(s[indparetosimUp], loc = prdGQM2Up$threshold, scale = prdGQM2Up$estimate[1], shape = prdGQM2Up$estimate[2]) + auxF2Low <- pgpd(-s[indparetosimLow], loc = prdGQM2Low$threshold, scale = prdGQM2Low$estimate[1], shape = prdGQM2Low$estimate[2]) + s[indnormalsim] <- qnorm(auxF, obsGQM$estimate[1], obsGQM$estimate[2]) + s[indparetosimUp[which(auxF2Up < 1)]] <- qgpd(auxF2Up[which(auxF2Up < 1)], loc = obsGQM2Up$threshold, scale = obsGQM2Up$estimate[1], shape = obsGQM2Up$estimate[2]) + s[indparetosimUp[which(auxF2Up == 1)]] <- max(o[indparetoUp], na.rm = TRUE) + s[indparetosimLow[which(auxF2Low < 1)]] <- -qgpd(auxF2Low[which(auxF2Low < 1)], loc = obsGQM2Low$threshold, scale = obsGQM2Low$estimate[1], shape = obsGQM2Low$estimate[2]) + s[indparetosimLow[which(auxF2Low == 1)]] <- min(o[indparetoLow], na.rm = TRUE) + } } else { + theta <- theta[1] threshold <- pr.threshold if (any(!is.na(o)) & any(!is.na(p))) { params <- adjustPrecipFreq(o, p, threshold) @@ -1343,7 +1379,7 @@ qdm <- function(o, p, s, precip, pr.threshold, n.quantiles, jitter.factor=0.01){ #' By default, It applies a gamma distribution to values under the threshold given by the 95th percentile #' (following Yang et al. 2010) and a general Pareto distribution (GPD) to values above the threshold. the threshold above #' which the GPD is fitted is the 95th percentile of the observed and the predicted wet-day distribution, respectively. -#' The user can specify a different threshold by modifying the parameter theta. It is applicable to precipitation data. +#' The user can specify a different threshold by modifying the parameter theta. #' #' #' \strong{variance} diff --git a/man/biasCorrection.Rd b/man/biasCorrection.Rd index f709696f..2292d963 100644 --- a/man/biasCorrection.Rd +++ b/man/biasCorrection.Rd @@ -10,7 +10,7 @@ biasCorrection(y, x, newdata = NULL, precipitation = FALSE, window = NULL, scaling.type = c("additive", "multiplicative"), fitdistr.args = list(densfun = "normal"), wet.threshold = 1, n.quantiles = NULL, extrapolation = c("none", "constant"), - theta = 0.95, detrend = TRUE, join.members = FALSE, + theta = c(0.95, 0.05), detrend = TRUE, join.members = FALSE, parallel = FALSE, max.ncores = 16, ncores = NULL) } \arguments{ @@ -67,7 +67,7 @@ thus, this argument is ignored if other bias correction method is selected. Defa \item{theta}{numeric indicating upper threshold (and lower for the left tail of the distributions, if needed) above which precipitation (temperature) values are fitted to a Generalized Pareto Distribution (GPD). Values below this threshold are fitted to a gamma (normal) distribution. By default, 'theta' is the 95th -percentile (5th percentile for the left tail). Only for \code{"gpqm"} method.} +percentile (and 5th percentile for the left tail). Only for \code{"gpqm"} method.} \item{detrend}{logical. Detrend data prior to bias correction? Only for \code{"dqm"}. Default. TRUE.} @@ -130,9 +130,11 @@ is applicable to correct wind data (Tie et al. 2014). Generalized Quantile Mapping (described in Gutjahr and Heinemann 2013) is also a parametric quantile mapping (see method 'pqm') but using two teorethical distributions, the gamma distribution and Generalized Pareto Distribution (GPD). By default, It applies a gamma distribution to values under the threshold given by the 95th percentile -(following Yang et al. 2010) and a general Pareto distribution (GPD) to values above the threshold. the threshold above -which the GPD is fitted is the 95th percentile of the observed and the predicted wet-day distribution, respectively. -The user can specify a different threshold by modifying the parameter theta. It is applicable to precipitation data. +(following Yang et al. 2010) and a general Pareto distribution (GPD) to values above the threshold. The threshold above +which the GPD is fitted is the 95th percentile of the observed and the predicted wet-day distribution, respectively. If precip=FALSE +values below the 5th percentile of the observed and the predicted distributions are additionally fitted using GPD and +the rest of the values of the distributions are fitted using a normal distribution. +The user can specify a different threshold(s) by modifying the parameter theta. \strong{mva} From e925d4a2e049969df5d1f4467fa28eb5a08b2ff7 Mon Sep 17 00:00:00 2001 From: Ana Date: Fri, 8 Nov 2019 14:47:53 +0100 Subject: [PATCH 05/13] gpqm: correct error due to no data to train --- R/biasCorrection.R | 58 ++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index 3867f653..02bed682 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -876,26 +876,29 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { indnormal <- ind[which((o[ind] < quantile(o[ind], theta)) & (o[ind] > quantile(o[ind], theta.low)))] indparetoUp <- ind[which(o[ind] >= quantile(o[ind], theta))] indparetoLow <- ind[which(o[ind] <= quantile(o[ind], theta.low))] - obsGQM <- fitdistr(o[indnormal],"normal") - obsGQM2Up <- fpot(o[indparetoUp], quantile(o[ind], theta), "gpd", std.err = FALSE) - obsGQM2Low <- fpot(-o[indparetoLow], -quantile(o[ind], theta.low), "gpd", std.err = FALSE) indp <- which(!is.na(p)) indnormalp <- indp[which((p[indp] < quantile(p[indp],theta)) & (p[indp] > quantile(p[indp], theta.low)))] indparetopUp <- indp[which(p[indp] >= quantile(p[indp], theta))] indparetopLow <- indp[which(p[indp] <= quantile(p[indp], theta.low))] - prdGQM <- fitdistr(p[indnormalp], "normal") - prdGQM2Up <- fpot(p[indparetopUp], quantile(p[indp], theta), "gpd", std.err = FALSE) - prdGQM2Low <- fpot(-p[indparetopLow], -quantile(p[indp], theta.low), "gpd", std.err = FALSE) inds <- which(!is.na(s)) indnormalsim <- inds[which((s[inds] < quantile(p[indp], theta)) & (s[inds] > quantile(p[indp], theta.low)))] indparetosimUp <- inds[which(s[inds] >= quantile(p[indp], theta))] indparetosimLow <- inds[which(s[inds] <= quantile(p[indp], theta.low))] + # normal distribution + obsGQM <- fitdistr(o[indnormal],"normal") + prdGQM <- fitdistr(p[indnormalp], "normal") auxF <- pnorm(s[indnormalsim], prdGQM$estimate[1], prdGQM$estimate[2]) - auxF2Up <- pgpd(s[indparetosimUp], loc = prdGQM2Up$threshold, scale = prdGQM2Up$estimate[1], shape = prdGQM2Up$estimate[2]) - auxF2Low <- pgpd(-s[indparetosimLow], loc = prdGQM2Low$threshold, scale = prdGQM2Low$estimate[1], shape = prdGQM2Low$estimate[2]) s[indnormalsim] <- qnorm(auxF, obsGQM$estimate[1], obsGQM$estimate[2]) + # upper tail + obsGQM2Up <- fpot(o[indparetoUp], quantile(o[ind], theta), "gpd", std.err = FALSE) + prdGQM2Up <- fpot(p[indparetopUp], quantile(p[indp], theta), "gpd", std.err = FALSE) + auxF2Up <- pgpd(s[indparetosimUp], loc = prdGQM2Up$threshold, scale = prdGQM2Up$estimate[1], shape = prdGQM2Up$estimate[2]) s[indparetosimUp[which(auxF2Up < 1)]] <- qgpd(auxF2Up[which(auxF2Up < 1)], loc = obsGQM2Up$threshold, scale = obsGQM2Up$estimate[1], shape = obsGQM2Up$estimate[2]) s[indparetosimUp[which(auxF2Up == 1)]] <- max(o[indparetoUp], na.rm = TRUE) + # lower tail + obsGQM2Low <- fpot(-o[indparetoLow], -quantile(o[ind], theta.low), "gpd", std.err = FALSE) + prdGQM2Low <- fpot(-p[indparetopLow], -quantile(p[indp], theta.low), "gpd", std.err = FALSE) + auxF2Low <- pgpd(-s[indparetosimLow], loc = prdGQM2Low$threshold, scale = prdGQM2Low$estimate[1], shape = prdGQM2Low$estimate[2]) s[indparetosimLow[which(auxF2Low < 1)]] <- -qgpd(auxF2Low[which(auxF2Low < 1)], loc = obsGQM2Low$threshold, scale = obsGQM2Low$estimate[1], shape = obsGQM2Low$estimate[2]) s[indparetosimLow[which(auxF2Low == 1)]] <- min(o[indparetoLow], na.rm = TRUE) } @@ -916,22 +919,33 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { ind <- which(o > threshold & !is.na(o)) indgamma <- ind[which(o[ind] < quantile(o[ind], theta))] indpareto <- ind[which(o[ind] >= quantile(o[ind], theta))] - obsGQM <- fitdistr(o[indgamma],"gamma") - obsGQM2 <- fpot(o[indpareto], quantile(o[ind], theta), "gpd", std.err = FALSE) - ind <- which(p > 0 & !is.na(p)) - indgammap <- ind[which(p[ind] < quantile(p[ind],theta))] - indparetop <- ind[which(p[ind] >= quantile(p[ind], theta))] - prdGQM <- fitdistr(p[indgammap], "gamma") - prdGQM2 <- fpot(p[indparetop], quantile(p[ind], theta), "gpd", std.err = FALSE) + indp <- which(p > 0 & !is.na(p)) + indgammap <- indp[which(p[indp] < quantile(p[indp],theta))] + indparetop <- indp[which(p[indp] >= quantile(p[indp], theta))] rain <- which(s > Pth & !is.na(s)) noRain <- which(s <= Pth & !is.na(s)) - indgammasim <- rain[which(s[rain] < quantile(p[ind], theta))] - indparetosim <- rain[which(s[rain] >= quantile(p[ind], theta))] - auxF <- pgamma(s[indgammasim], prdGQM$estimate[1], rate = prdGQM$estimate[2]) - auxF2 <- pgpd(s[indparetosim], loc = 0, scale = prdGQM2$estimate[1], shape = prdGQM2$estimate[2]) - s[indgammasim] <- qgamma(auxF, obsGQM$estimate[1], rate = obsGQM$estimate[2]) - s[indparetosim[which(auxF2 < 1)]] <- qgpd(auxF2[which(auxF2 < 1)], loc = 0, scale = obsGQM2$estimate[1], shape = obsGQM2$estimate[2]) - s[indparetosim[which(auxF2 == 1)]] <- max(o[indpareto], na.rm = TRUE) + indgammasim <- rain[which(s[rain] < quantile(p[indp], theta))] + indparetosim <- rain[which(s[rain] >= quantile(p[indp], theta))] + # gamma distribution + if(length(indgamma)>1 & length(indgammap)>1 & length(indgammasim)>1){ + obsGQM <- fitdistr(o[indgamma],"gamma") + prdGQM <- fitdistr(p[indgammap], "gamma") + auxF <- pgamma(s[indgammasim], prdGQM$estimate[1], rate = prdGQM$estimate[2]) + s[indgammasim] <- qgamma(auxF, obsGQM$estimate[1], rate = obsGQM$estimate[2]) + } else{ + s[indgammasim] <-0 + } + # upper tail + if(any(o[indpareto] > quantile(o[ind], theta)) & any(p[indparetop] > quantile(p[indp], theta))){ + obsGQM2 <- fpot(o[indpareto], quantile(o[ind], theta), "gpd", std.err = FALSE) + prdGQM2 <- fpot(p[indparetop], quantile(p[indp], theta), "gpd", std.err = FALSE) + auxF2 <- pgpd(s[indparetosim], loc = 0, scale = prdGQM2$estimate[1], shape = prdGQM2$estimate[2]) + s[indparetosim[which(auxF2 < 1)]] <- qgpd(auxF2[which(auxF2 < 1)], loc = 0, scale = obsGQM2$estimate[1], shape = obsGQM2$estimate[2]) + s[indparetosim[which(auxF2 == 1)]] <- max(o[indpareto], na.rm = TRUE) + } else { + s[indparetosim] <- 0 + } + # dry days s[noRain] <- 0 } else { warning("There is at least one location without rainfall above the threshold.\n In this (these) location(s) none bias correction has been applied.") From 3f4692cfbea81e7942014b894bfb5a9cdf63a7e3 Mon Sep 17 00:00:00 2001 From: Ana Date: Fri, 8 Nov 2019 23:09:31 +0100 Subject: [PATCH 06/13] gpqm: correct bug for extreme probabilities --- R/biasCorrection.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index 02bed682..bdce3850 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -893,14 +893,16 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { obsGQM2Up <- fpot(o[indparetoUp], quantile(o[ind], theta), "gpd", std.err = FALSE) prdGQM2Up <- fpot(p[indparetopUp], quantile(p[indp], theta), "gpd", std.err = FALSE) auxF2Up <- pgpd(s[indparetosimUp], loc = prdGQM2Up$threshold, scale = prdGQM2Up$estimate[1], shape = prdGQM2Up$estimate[2]) - s[indparetosimUp[which(auxF2Up < 1)]] <- qgpd(auxF2Up[which(auxF2Up < 1)], loc = obsGQM2Up$threshold, scale = obsGQM2Up$estimate[1], shape = obsGQM2Up$estimate[2]) + s[indparetosimUp[which(auxF2Up < 1 & auxF2Up > 0)]] <- qgpd(auxF2Up[which(auxF2Up < 1 & auxF2Up > 0)], loc = obsGQM2Up$threshold, scale = obsGQM2Up$estimate[1], shape = obsGQM2Up$estimate[2]) s[indparetosimUp[which(auxF2Up == 1)]] <- max(o[indparetoUp], na.rm = TRUE) + s[indparetosimUp[which(auxF2Up == 0)]] <- min(o[indparetoUp], na.rm = TRUE) # lower tail obsGQM2Low <- fpot(-o[indparetoLow], -quantile(o[ind], theta.low), "gpd", std.err = FALSE) prdGQM2Low <- fpot(-p[indparetopLow], -quantile(p[indp], theta.low), "gpd", std.err = FALSE) auxF2Low <- pgpd(-s[indparetosimLow], loc = prdGQM2Low$threshold, scale = prdGQM2Low$estimate[1], shape = prdGQM2Low$estimate[2]) - s[indparetosimLow[which(auxF2Low < 1)]] <- -qgpd(auxF2Low[which(auxF2Low < 1)], loc = obsGQM2Low$threshold, scale = obsGQM2Low$estimate[1], shape = obsGQM2Low$estimate[2]) - s[indparetosimLow[which(auxF2Low == 1)]] <- min(o[indparetoLow], na.rm = TRUE) + s[indparetosimLow[which(auxF2Low < 1 & auxF2Low > 0)]] <- -qgpd(auxF2Low[which(auxF2Low < 1 & auxF2Low > 0)], loc = obsGQM2Low$threshold, scale = obsGQM2Low$estimate[1], shape = obsGQM2Low$estimate[2]) + s[indparetosimLow[which(auxF2Low == 1)]] <- max(o[indparetoLow], na.rm = TRUE) + s[indparetosimLow[which(auxF2Low == 0)]] <- min(o[indparetoLow], na.rm = TRUE) } } else { theta <- theta[1] From 04f9aa2d9fc1ca6d2deb3a65b69e060ba5136b0e Mon Sep 17 00:00:00 2001 From: Ana Date: Sat, 9 Nov 2019 21:13:04 +0100 Subject: [PATCH 07/13] isimip: resolve conflict with pr and type --- R/isimip.R | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/R/isimip.R b/R/isimip.R index ebfdae93..abe24961 100644 --- a/R/isimip.R +++ b/R/isimip.R @@ -294,8 +294,7 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli } } } - } - if(any(grepl(obs$Variable$varName,c("pr","tp","precipitation","precip")))){ + } else if(any(grepl(obs$Variable$varName,c("pr","tp","precipitation","precip")))){ if (length(threshold)==1){ threshold<-array(data = threshold, dim = 3) } @@ -623,8 +622,7 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli } } attr(sim$Data, "threshold") <- threshold - } - if(((any(grepl(obs$Variable$varName,c("radiation","pressure","wind","win dspeed","humidity","specific humidity","radiacion","presion","viento","humedad","humedad especifica","rss","rsds","rls","rlds","ps","slp","wss","huss","hus")))) | (type == "multiplicative")) & !multiField){ + } else if(((any(grepl(obs$Variable$varName,c("radiation","pressure","wind","win dspeed","humidity","specific humidity","radiacion","presion","viento","humedad","humedad especifica","rss","rsds","rls","rlds","ps","slp","wss","huss","hus")))) | (type == "multiplicative")) & !multiField){ if (length(threshold)==1){ threshold<-array(data = threshold, dim = 3) } @@ -949,8 +947,7 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli } } attr(sim$Data, "threshold") <- threshold - } - if((any(grepl(obs$Variable$varName,c("maximum temperature","temperatura maxima","tasmax","tmax","minimum temperature","temperatura minima","tasmin","tmin"))))){ + } else if((any(grepl(obs$Variable$varName,c("maximum temperature","temperatura maxima","tasmax","tmax","minimum temperature","temperatura minima","tasmin","tmin"))))){ indTas <- which(!is.na(match(obs$Variable$varName,c("tas","temperatura media","mean temperature","tmean")))) if (length(indTas) == 0){ stop("Mean temperature is needed to correct the Maximum and Minimum Temperatures") @@ -1042,12 +1039,12 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli } } } - } + # case {'uas';'vas';'ua';'va';'eastward wind component';'northward wind component'}, # if isempty(Ws),error('Wind speed is necessary for the correction of the eastward and northward wind component');end # wsC=isimip(Ws.O,Ws.P,Ws.F,'variable','windspeed','datesobs',datesObs,'datesfor',datesFor); # indC=find(~isnan(Ws.F) & Ws.F>0);F(indC)=(F(indC).*wsC(indC))./Ws.F(indC); - if((any(grepl(obs$Variable$varName,c("uas","vas","ua","va","eastward wind component","northward wind component"))))) { + } else if((any(grepl(obs$Variable$varName,c("uas","vas","ua","va","eastward wind component","northward wind component"))))) { indTas <- which(!is.na(match(obs$Variable$varName,c("wind","windspeed","viento","wss")))) if (length(indTas) == 0){ stop("Wind speed is needed to correct eastward and northward wind components") @@ -1104,12 +1101,12 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli sim$Data[indTasForAux[indCalmWind,]] <- calmWind } } - } + # case {'prsn';'snowfall';'nieve'}, # if isempty(Pr),error('Precipitation is necessary for the correction of the snowfall');end # prC=isimip(Pr.O,Pr.P,Pr.F,'variable','precipitation','datesobs',datesObs,'datesfor',datesFor,'threshold', threshold); # indC=find(~isnan(Pr.F) & Pr.F>0);F(indC)=(F(indC).*prC(indC))./Pr.F(indC); - if((any(grepl(obs$Variable$varName,c("prsn","snowfall","nieve"))))) { + } else if((any(grepl(obs$Variable$varName,c("prsn","snowfall","nieve"))))) { indTas <- which(!is.na(match(obs$Variable$varName,c("pr","tp","precipitation","precip")))) if (length(indTas) == 0){ stop("Precipitation is needed to correct snow") From 8c14368157d2491dc1ae47bfd162491e3eb30041 Mon Sep 17 00:00:00 2001 From: Ana Date: Sun, 10 Nov 2019 18:27:36 +0100 Subject: [PATCH 08/13] gqpm pr: correct bug for extreme probabilities --- R/biasCorrection.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index bdce3850..a709bb74 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -942,8 +942,9 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { obsGQM2 <- fpot(o[indpareto], quantile(o[ind], theta), "gpd", std.err = FALSE) prdGQM2 <- fpot(p[indparetop], quantile(p[indp], theta), "gpd", std.err = FALSE) auxF2 <- pgpd(s[indparetosim], loc = 0, scale = prdGQM2$estimate[1], shape = prdGQM2$estimate[2]) - s[indparetosim[which(auxF2 < 1)]] <- qgpd(auxF2[which(auxF2 < 1)], loc = 0, scale = obsGQM2$estimate[1], shape = obsGQM2$estimate[2]) + s[indparetosim[which(auxF2 < 1 & auxF2 > 0)]] <- qgpd(auxF2[which(auxF2 < 1 & auxF2 > 0)], loc = 0, scale = obsGQM2$estimate[1], shape = obsGQM2$estimate[2]) s[indparetosim[which(auxF2 == 1)]] <- max(o[indpareto], na.rm = TRUE) + s[indparetosim[which(auxF2 == 0)]] <- min(o[indpareto], na.rm = TRUE) } else { s[indparetosim] <- 0 } From ab4df877f7486fcfc7a362cce646b6a2af878014 Mon Sep 17 00:00:00 2001 From: Ana Date: Mon, 11 Nov 2019 14:59:47 +0100 Subject: [PATCH 09/13] gpqm precip: trycatch for optimization error --- R/biasCorrection.R | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/R/biasCorrection.R b/R/biasCorrection.R index a709bb74..8cfd8b0c 100644 --- a/R/biasCorrection.R +++ b/R/biasCorrection.R @@ -930,10 +930,15 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { indparetosim <- rain[which(s[rain] >= quantile(p[indp], theta))] # gamma distribution if(length(indgamma)>1 & length(indgammap)>1 & length(indgammasim)>1){ - obsGQM <- fitdistr(o[indgamma],"gamma") - prdGQM <- fitdistr(p[indgammap], "gamma") - auxF <- pgamma(s[indgammasim], prdGQM$estimate[1], rate = prdGQM$estimate[2]) - s[indgammasim] <- qgamma(auxF, obsGQM$estimate[1], rate = obsGQM$estimate[2]) + obsGQM <- tryCatch(fitdistr(o[indgamma],"gamma"), error = function(err){NULL}) + prdGQM <- tryCatch(fitdistr(p[indgammap], "gamma"), error = function(err){NULL}) + if (!is.null(prdGQM) & !is.null(obsGQM)) { + auxF <- pgamma(s[indgammasim], prdGQM$estimate[1], rate = prdGQM$estimate[2]) + s[indgammasim] <- qgamma(auxF, obsGQM$estimate[1], rate = obsGQM$estimate[2]) + } else { + warning("Fitting error for location and selected 'densfun'.") + s[indgammasim] <- NA + } } else{ s[indgammasim] <-0 } @@ -950,6 +955,9 @@ gpqm <- function(o, p, s, precip, pr.threshold, theta) { } # dry days s[noRain] <- 0 + # inf to NA + s[is.infinite(s)] <- NA + s[s>1e3] <- NA } else { warning("There is at least one location without rainfall above the threshold.\n In this (these) location(s) none bias correction has been applied.") } From 6bec86104aca0cb0810c3203b25addc51dcbc02c Mon Sep 17 00:00:00 2001 From: Ana Date: Mon, 11 Nov 2019 16:53:08 +0100 Subject: [PATCH 10/13] isimip: drop dimensions in multigrids --- R/isimip.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/isimip.R b/R/isimip.R index abe24961..cd3f2dd3 100644 --- a/R/isimip.R +++ b/R/isimip.R @@ -79,9 +79,9 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multiplicative")) { - obs <- y - pred <- x - sim <- newdata + obs <- redim(y, drop=TRUE) + pred <- redim(x, drop=TRUE) + sim <- redim(newdata, drop=TRUE) if (is.null(obs$Dates$start)){ datesObs <- as.POSIXct(obs$Dates[[1]]$start, tz="GMT", format="%Y-%m-%d %H:%M:%S") @@ -131,7 +131,6 @@ isimip <- function (y, x, newdata, threshold = 1, type = c("additive", "multipli callObs <- as.call(c(list(as.name("["),quote(pred$Data)), indTimeObs1)) monthlyPred[indTimeObs] <- apply(eval(callObs), FUN = mean, MARGIN = setdiff(1:length(dimPred),pred.time.index), na.rm = TRUE) } - if (is.null(sim$Dates$start)){ datesFor <- as.POSIXct(sim$Dates[[1]]$start, tz="GMT", format="%Y-%m-%d %H:%M:%S") }else{ From 405e596549ecd33dd957bb7b86feb1953f5a054a Mon Sep 17 00:00:00 2001 From: Jorge Date: Fri, 22 Nov 2019 11:40:46 +0100 Subject: [PATCH 11/13] folds non consecutive --- R/downscaleCV.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/downscaleCV.R b/R/downscaleCV.R index 6bc4517f..f3c58af2 100644 --- a/R/downscaleCV.R +++ b/R/downscaleCV.R @@ -128,7 +128,7 @@ downscaleCV <- function(x, y, method, if (sampling.strategy == "kfold.chronological") { type <- "chronological" if (!is.numeric(folds)) { - folds.user <- unlist(folds) %>% unique() + folds.user <- unlist(folds) %>% unique() %>% sort() folds.data <- getYearsAsINDEX(y) %>% unique() if (any(folds.user != folds.data)) stop("In the parameters folds you have indicated years that do not belong to the dataset. Please revise the setup of this parameter.") } From 4030b40f4d84415fa5242791e1af46f72bcff7a8 Mon Sep 17 00:00:00 2001 From: "Jose M. Gutierrez" Date: Fri, 13 Dec 2019 14:23:33 +0100 Subject: [PATCH 12/13] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 498aea6d..8c4926e6 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3277316.svg)](https://doi.org/10.5281/zenodo.3277316) + # What is downscaleR? **downscaleR** is an R package for empirical-statistical downscaling focusing on daily data and covering the most popular approaches (bias correction, Model Output Statistics, Perfect Prognosis) and techniques (e.g. quantile mapping, regression, analogs, neural networks). This package has been conceived to work in the framework of both seasonal forecasting and climate change studies. Thus, it considers ensemble members as a basic dimension of the data structure. Find out more about this package at the [downscaleR wiki](https://github.com/SantanderMetGroup/downscaleR/wiki). From 3e7bf984de43609a47ea44daef5abe68ff542f21 Mon Sep 17 00:00:00 2001 From: Jorge Date: Wed, 5 Feb 2020 17:36:45 +0100 Subject: [PATCH 13/13] news and bug fix in downscaleCV --- NEWS | 8 ++++++++ R/downscaleCV.R | 38 ++++++++++++++++++++------------------ man/downscaleCV.Rd | 5 +++-- 3 files changed, 31 insertions(+), 20 deletions(-) diff --git a/NEWS b/NEWS index c8935142..cdd5ccf7 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,7 @@ See the [Releases section](https://github.com/SantanderMetGroup/downscaleR/releases) for older version changes + ## v3.0.0 (14 Jun 2018) * New user interface for flexible definition of predictors (`prepareData`) and prediction data (`prepareNewData`). @@ -49,3 +50,10 @@ See the [Releases section](https://github.com/SantanderMetGroup/downscaleR/relea * downscale.predict renamed --> downscalePredict * Other minor changes and documentation updates +## v3.1.1 (5 Feb 2020) + +* Bug fix in `downscaleCV' +* Improve the internal code of `downscaleChunk' +* Add qdm and dqm bias correction methods +* Bug fix in `biasCorrection' + diff --git a/R/downscaleCV.R b/R/downscaleCV.R index f3c58af2..25b305c9 100644 --- a/R/downscaleCV.R +++ b/R/downscaleCV.R @@ -22,12 +22,13 @@ #' @param y The observations dataset. It should be an object as returned by \pkg{loadeR}. #' @param method A string value. Type of transer function. Currently implemented options are \code{"analogs"}, \code{"GLM"} and \code{"NN"}. #' @param sampling.strategy Specifies a sampling strategy to define the training and test subsets. Possible values are -#' \code{"kfold.chronological"} (the default), \code{"kfold.random"} and \code{"leave-one-year-out"}. +#' \code{"kfold.chronological"} (the default), \code{"kfold.random"}, \code{"leave-one-year-out"} and NULL. #' The \code{sampling.strategy} choices are next described: #' \itemize{ #' \item \code{"kfold.random"} creates the number of folds indicated in the \code{folds} argument by randomly sampling the entries along the time dimension. #' \item \code{"kfold.chronological"} is similar to \code{"kfold.random"}, but the sampling is performed in ascending order along the time dimension. -#' \item \code{"leave-one-year-out"}. This schema performs a leave-one-year-out cross validation. It is equivalent to introduce in the argument \code{folds} a list of all years one by one. +#' \item \code{"leave-one-year-out"}. This scheme performs a leave-one-year-out cross validation. It is equivalent to introduce in the argument \code{folds} a list of all years one by one. +#' \item \code{NULL}. The folds are specified by the user in the function parameter \code{folds}. #' } #' The first two choices will be controlled by the argument \code{folds} (see below) #' @param folds This arguments controls the number of folds, or how these folds are created (ignored if \code{sampling.strategy = "leave-one-year-out"}). If it is given as a fraction in the range (0-1), @@ -120,24 +121,25 @@ downscaleCV <- function(x, y, method, x <- getTemporalIntersection(x,y,which.return = "obs") y <- getTemporalIntersection(x,y,which.return = "prd") - if (sampling.strategy == "leave-one-year-out") { - type <- "chronological" - folds <- as.list(getYearsAsINDEX(y) %>% unique()) - } - - if (sampling.strategy == "kfold.chronological") { - type <- "chronological" - if (!is.numeric(folds)) { - folds.user <- unlist(folds) %>% unique() %>% sort() - folds.data <- getYearsAsINDEX(y) %>% unique() - if (any(folds.user != folds.data)) stop("In the parameters folds you have indicated years that do not belong to the dataset. Please revise the setup of this parameter.") + if (!is.null(sampling.strategy)) { + if (sampling.strategy == "leave-one-year-out") { + type <- "chronological" + folds <- as.list(getYearsAsINDEX(y) %>% unique()) + } + + if (sampling.strategy == "kfold.chronological") { + type <- "chronological" + if (!is.numeric(folds)) { + folds.user <- unlist(folds) %>% unique() %>% sort() + folds.data <- getYearsAsINDEX(y) %>% unique() + if (any(folds.user != folds.data)) stop("In the parameters folds you have indicated years that do not belong to the dataset. Please revise the setup of this parameter.") + } + } + if (sampling.strategy == "kfold.random") { + type <- "random" + if (!is.numeric(folds)) stop("In kfold.random, the parameter folds represent the NUMBER of folds and thus, it should be a NUMERIC value.") } } - if (sampling.strategy == "kfold.random") { - type <- "random" - if (!is.numeric(folds)) stop("In kfold.random, the parameter folds represent the NUMBER of folds and thus, it should be a NUMERIC value.") - } - if (is.list(folds)) { if (any(duplicated(unlist(folds)))) stop("Years can not appear in more than one fold") } diff --git a/man/downscaleCV.Rd b/man/downscaleCV.Rd index ca038d99..6886cd69 100644 --- a/man/downscaleCV.Rd +++ b/man/downscaleCV.Rd @@ -18,12 +18,13 @@ downscaleCV(x, y, method, sampling.strategy = "kfold.chronological", \item{method}{A string value. Type of transer function. Currently implemented options are \code{"analogs"}, \code{"GLM"} and \code{"NN"}.} \item{sampling.strategy}{Specifies a sampling strategy to define the training and test subsets. Possible values are -\code{"kfold.chronological"} (the default), \code{"kfold.random"} and \code{"leave-one-year-out"}. +\code{"kfold.chronological"} (the default), \code{"kfold.random"}, \code{"leave-one-year-out"} and NULL. The \code{sampling.strategy} choices are next described: \itemize{ \item \code{"kfold.random"} creates the number of folds indicated in the \code{folds} argument by randomly sampling the entries along the time dimension. \item \code{"kfold.chronological"} is similar to \code{"kfold.random"}, but the sampling is performed in ascending order along the time dimension. - \item \code{"leave-one-year-out"}. This schema performs a leave-one-year-out cross validation. It is equivalent to introduce in the argument \code{folds} a list of all years one by one. + \item \code{"leave-one-year-out"}. This scheme performs a leave-one-year-out cross validation. It is equivalent to introduce in the argument \code{folds} a list of all years one by one. + \item \code{NULL}. The folds are specified by the user in the function parameter \code{folds}. } The first two choices will be controlled by the argument \code{folds} (see below)}