Skip to content

Commit

Permalink
Merge pull request #525 from SebKrantz/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
SebKrantz authored Jan 11, 2024
2 parents 2239d2f + a0bcc61 commit 6558a31
Show file tree
Hide file tree
Showing 8 changed files with 36 additions and 24 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: collapse
Title: Advanced and Fast Data Transformation
Version: 2.0.9
Date: 2024-01-10
Date: 2024-01-11
Authors@R: c(
person("Sebastian", "Krantz", role = c("aut", "cre"),
email = "[email protected]",
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,8 @@ importFrom("stats", "as.formula", "complete.cases", "cor", "cov", "var", "pt",
export(allNA)
export(missing_cases)
export(na_rm)
export(na_locf)
export(na_focb)
export(na_omit)
export(na_insert)
export(massign)
Expand Down
2 changes: 1 addition & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# collapse 2.0.9

* `replace_na()` now has a `type` argument which supports options `"locf"` and `"focb"` (default `"const"`), similar to `data.table::nafill`. The `replace_na()` implementation also supports character data and list-columns (`NULL/empty` elements). Thanks @BenoitLondon for suggesting (#489).
* Added functions `na_locf()` and `na_focb()` for fast basic C implementations of these procedures (optionally by reference). `replace_na()` now also has a `type` argument which supports options `"locf"` and `"focb"` (default `"const"`), similar to `data.table::nafill`. The implementation also supports character data and list-columns (`NULL/empty` elements). Thanks @BenoitLondon for suggesting (#489). I note that `na_locf()` exists in some other packages (such as *imputeTS*) where it is implemented in R and has additional options. Users should utilize the flexible namespace i.e. `set_collapse(remove = "na_locf")` to deal with this.

* Fixed a bug in weighted quantile estimation (`fquantile()`) that could lead to wrong/out-of-range estimates in some cases. Thanks @zander-prinsloo for reporting (#523).

Expand Down
10 changes: 7 additions & 3 deletions R/recode_replace.R
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,14 @@ recode_char <- function(X, ..., default = NULL, missing = NULL, regex = FALSE,
}


na_locf <- function(x, ph1, ph2, set = FALSE) .Call(C_na_locf_focb, x, 1L, set)
na_focb <- function(x, ph1, ph2, set = FALSE) .Call(C_na_locf_focb, x, 2L, set)
na_locf <- function(x, set = FALSE) .Call(C_na_locf, x, set)
na_focb <- function(x, set = FALSE) .Call(C_na_focb, x, set)

na_locf_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_locf, x, set)
na_focb_ph <- function(x, ph1, ph2, set = FALSE) .Call(C_na_focb, x, set)

replace_na <- function(X, value = 0L, cols = NULL, set = FALSE, type = "const") {
FUN <- switch(type, const =, value = scv, locf = na_locf, focb = na_focb,
FUN <- switch(type, const =, value = scv, locf = na_locf_ph, focb = na_focb_ph,
stop("Unknown type:", type))
if(set) {
if(is.list(X)) {
Expand Down
8 changes: 6 additions & 2 deletions man/efficient-programming.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
\alias{fdim}
\alias{missing_cases}
\alias{na_rm}
\alias{na_locf}
\alias{na_focb}
\alias{na_omit}
\alias{na_insert}
\alias{seq_row}
Expand Down Expand Up @@ -61,8 +63,9 @@ X \%+=\% V # Infix for setop(X, "+", V). See also Note (2)
X \%-=\% V # Infix for setop(X, "-", V). See also Note (2)
X \%*=\% V # Infix for setop(X, "*", V). See also Note (2)
X \%/=\% V # Infix for setop(X, "/", V). See also Note (2)
na_rm(x) # Fast: if(anyNA(x)) x[!is.na(x)] else x,
# also removes NULL / empty elements from list
na_rm(x) # Fast: if(anyNA(x)) x[!is.na(x)] else x, last
na_locf(x, set = FALSE) # obs. carried forward and first obs. carried back.
na_focb(x, set = FALSE) # (by reference). These also support lists (NULL/empty)
na_omit(X, cols = NULL, # Faster na.omit for matrices and data frames,
na.attr = FALSE, # can use selected columns to check, attach indices,
prop = 0, ...) # and remove cases with a proportion of values missing
Expand All @@ -88,6 +91,7 @@ cinv(x) # Choleski (fast) inverse of symmetric PD matrix, e.
\item{x, v}{a (atomic) vector or matrix (\code{na_rm} also supports lists).}
\item{value}{a single value of any (atomic) vector type. For \code{whichv} it can also be a \code{length(x)} vector.}
\item{invert}{logical. \code{TRUE} considers elements \code{x != value}.}
\item{set}{logical. \code{TRUE} transforms \code{x} by reference.}
\item{simplify}{logical. If \code{value} is a length-1 atomic vector, \code{alloc()} with \code{simplify = TRUE} returns a length-n atomic vector. If \code{simplify = FALSE}, the result is always a list.}
\item{vind1}{logical. If \code{length(v) == 1L}, setting \code{vind1 = TRUE} will interpret \code{v} as an index, rather than a value to search and replace.}
\item{xlist}{logical. If \code{X} is a list, the default is to treat it like a data frame and replace rows. Setting \code{xlist = TRUE} will treat \code{X} and its replacement \code{R} like 1-dimensional list vectors.}
Expand Down
3 changes: 2 additions & 1 deletion src/ExportSymbols.c
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ static const R_CallMethodDef CallEntries[] = {
{"C_pivot_wide", (DL_FUNC) &pivot_wide, 5},
{"C_sort_merge_join", (DL_FUNC) &sort_merge_join, 4},
{"C_replace_outliers", (DL_FUNC) &replace_outliers, 5},
{"C_na_locf_focb", (DL_FUNC) &na_locf_focb, 3},
{"C_na_locf", (DL_FUNC) &na_locf, 2},
{"C_na_focb", (DL_FUNC) &na_focb, 2},
// {"C_aschar", (DL_FUNC) &CasChar, 1},
{"C_subsetDT", (DL_FUNC) &subsetDT, 4},
{"C_subsetVector", (DL_FUNC) &subsetVector, 3},
Expand Down
3 changes: 2 additions & 1 deletion src/collapse_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,8 @@ SEXP pivot_long(SEXP data, SEXP ind, SEXP idcol);
SEXP pivot_wide(SEXP index, SEXP id, SEXP column, SEXP fill, SEXP Rnthreads);
SEXP sort_merge_join(SEXP x, SEXP table, SEXP ot, SEXP count);
SEXP replace_outliers(SEXP x, SEXP limits, SEXP value, SEXP single_limit, SEXP set);
SEXP na_locf_focb(SEXP x, SEXP Rtype, SEXP Rset);
SEXP na_locf(SEXP x, SEXP Rset);
SEXP na_focb(SEXP x, SEXP Rset);
SEXP multi_match(SEXP m, SEXP g);
// fnobs rewritten in C:
SEXP fnobsC(SEXP x, SEXP Rng, SEXP g);
Expand Down
30 changes: 15 additions & 15 deletions src/programming.c
Original file line number Diff line number Diff line change
Expand Up @@ -725,8 +725,11 @@ SEXP replace_outliers(SEXP x, SEXP limits, SEXP value, SEXP single_limit, SEXP s
return res;
}

void na_locf(SEXP x) {
int n = length(x);
SEXP na_locf(SEXP x, SEXP Rset) {
int n = length(x), copy = asLogical(Rset) == 0;
if(isMatrix(x)) warning("na_locf() does not (yet) have explicit support for matrices, i.e., it treats a matrix as a single vector. Use dapply(M, na_locf) if column-wise processing is desired");
if(copy) x = PROTECT(shallow_duplicate(x));

switch (TYPEOF(x)) {
case INTSXP:
case LGLSXP:
Expand Down Expand Up @@ -782,12 +785,17 @@ void na_locf(SEXP x) {
break;
}
default:
error("na_locf does not support type '%s'", type2char(TYPEOF(x)));
error("na_locf() does not support type '%s'", type2char(TYPEOF(x)));
}
UNPROTECT(copy);
return x;
}

void na_focb(SEXP x) {
int n = length(x);
SEXP na_focb(SEXP x, SEXP Rset) {
int n = length(x), copy = asLogical(Rset) == 0;
if(isMatrix(x)) warning("na_focb() does not (yet) have explicit support for matrices, i.e., it treats a matrix as a single vector. Use dapply(M, na_focb) if column-wise processing is desired");
if(copy) x = PROTECT(shallow_duplicate(x));

switch (TYPEOF(x)) {
case INTSXP:
case LGLSXP:
Expand Down Expand Up @@ -843,21 +851,13 @@ void na_focb(SEXP x) {
break;
}
default:
error("na_focb does not support type '%s'", type2char(TYPEOF(x)));
error("na_focb() does not support type '%s'", type2char(TYPEOF(x)));
}
}

SEXP na_locf_focb(SEXP x, SEXP Rtype, SEXP Rset) {
int copy = asLogical(Rset) == 0, type = asInteger(Rtype);
if(isMatrix(x)) warning("na_locf/focb do not yet have explicit support for matrices, i.e., they treat a matrix as a single vector. Use dapply(M, replace_na, type = 'locf') if column-wise processing is desired");
if(copy) x = PROTECT(shallow_duplicate(x));
if(type == 1) na_locf(x);
else if(type == 2) na_focb(x);
else error("Internal error, unknown locf/focb 'type': %d", type);
UNPROTECT(copy);
return x;
}


SEXP vtypes(SEXP x, SEXP isnum) {
int tx = TYPEOF(x);
if(tx != VECSXP) return ScalarInteger(tx);
Expand Down

0 comments on commit 6558a31

Please sign in to comment.