From ea3c5e6ba3cd2075e813d88b7b7e548dff95d482 Mon Sep 17 00:00:00 2001 From: Alex Strashny Date: Thu, 18 Jan 2024 12:17:20 -0500 Subject: [PATCH] . --- R/set_survey.R | 2 +- R/survey_subset.R | 6 +- R/svyciprop_adjusted.R | 70 ++++++++++--------- R/tab.R | 2 +- R/tab_rate.R | 4 +- R/tab_subset.R | 19 +++-- R/tab_subset_rate.R | 2 +- R/total.R | 2 +- R/total_rate.R | 2 +- R/var_any.R | 2 +- R/var_case.R | 2 +- R/var_collapse.R | 2 +- R/var_copy.R | 2 +- R/var_cross.R | 2 +- R/var_cut.R | 2 +- R/var_list.R | 2 +- R/z_data.R | 2 +- R/z_utils.R | 2 + ...tory-Medical-Care-Survey-NAMCS-tables.html | 10 +-- docs/pkgdown.yml | 2 +- docs/reference/namcs2019sv.html | 2 +- docs/reference/set_survey.html | 2 +- docs/reference/survey_subset.html | 6 +- docs/reference/svyciprop_adjusted.html | 16 ++--- docs/reference/tab.html | 2 +- docs/reference/tab_rate.html | 2 +- docs/reference/tab_subset.html | 5 +- docs/reference/tab_subset_rate.html | 2 +- docs/reference/total.html | 4 +- docs/reference/total_rate.html | 4 +- docs/reference/var_any.html | 2 +- docs/reference/var_case.html | 2 +- docs/reference/var_collapse.html | 2 +- docs/reference/var_copy.html | 2 +- docs/reference/var_cross.html | 2 +- docs/reference/var_cut.html | 2 +- docs/reference/var_list.html | 4 +- docs/search.json | 2 +- inst/WORDLIST | 6 ++ man/namcs2019sv.Rd | 2 +- man/set_survey.Rd | 2 +- man/survey_subset.Rd | 6 +- man/svyciprop_adjusted.Rd | 14 ++-- man/tab.Rd | 2 +- man/tab_rate.Rd | 2 +- man/tab_subset.Rd | 5 +- man/tab_subset_rate.Rd | 2 +- man/total.Rd | 2 +- man/total_rate.Rd | 2 +- man/var_any.Rd | 2 +- man/var_case.Rd | 2 +- man/var_collapse.Rd | 2 +- man/var_copy.Rd | 2 +- man/var_cross.Rd | 2 +- man/var_cut.Rd | 2 +- man/var_list.Rd | 2 +- ...atory-Medical-Care-Survey-NAMCS-tables.Rmd | 2 - 57 files changed, 136 insertions(+), 127 deletions(-) diff --git a/R/set_survey.R b/R/set_survey.R index 58547a4..e5d5d61 100644 --- a/R/set_survey.R +++ b/R/set_survey.R @@ -11,7 +11,7 @@ #' #' @param design a survey object (`survey.design` or `svyrep.design`) #' -#' @return Object with info about the survey. +#' @return Info about the survey. #' @export #' #' @examples diff --git a/R/survey_subset.R b/R/survey_subset.R index 286d400..9d34dc4 100644 --- a/R/survey_subset.R +++ b/R/survey_subset.R @@ -1,10 +1,10 @@ #' Subset a survey, while preserving variable labels #' -#' @param design a survey design object +#' @param design a survey object #' @param subset an expression specifying the sub-population -#' @param label survey label of the newly created survey design object +#' @param label survey label of the newly created survey object #' -#' @return a new survey design object +#' @return a new survey object #' @export #' #' @examples diff --git a/R/svyciprop_adjusted.R b/R/svyciprop_adjusted.R index 4de88e1..706a3ca 100644 --- a/R/svyciprop_adjusted.R +++ b/R/svyciprop_adjusted.R @@ -1,17 +1,17 @@ #' Confidence intervals for proportions, adjusted for degrees of freedom #' -#' A version of `survey::svyciprop` that adjusts for the degrees of freedom when `method == "beta"`. +#' A version of `survey::svyciprop()` that adjusts for the degrees of freedom when `method = "beta"`. #' #' Written by Makram Talih in 2019. #' #' To use this function in tabulations, type: `options(surveytable.adjust_svyciprop = TRUE)`. #' -#' @param formula see `survey::svyciprop`. -#' @param design see `survey::svyciprop`. -#' @param method see `survey::svyciprop`. -#' @param level see `survey::svyciprop`. +#' @param formula see `survey::svyciprop()`. +#' @param design see `survey::svyciprop()`. +#' @param method see `survey::svyciprop()`. +#' @param level see `survey::svyciprop()`. #' @param df_method how `df` should be calculated: "default" or "NHIS". -#' @param ... see `survey::svyciprop`. +#' @param ... see `survey::svyciprop()`. #' #' `df_method`: for "default", `df = degf(design)`; for "NHIS", `df = nrow(design) - 1`. #' @@ -26,50 +26,52 @@ #' tab("AGER") svyciprop_adjusted = function(formula , design - , method = c("logit", "likelihood", "asin", "beta", "mean") + , method = c("logit", "likelihood", "asin", "beta" + , "mean", "xlogit") , level = 0.95 , df_method , ...) { assert_that(df_method %in% c("default", "NHIS")) df = switch(df_method, - default = degf(design), - NHIS = nrow(design) - 1 + default = degf(design), + NHIS = nrow(design) - 1 ) - method = match.arg(method) - if (method == "beta") { - m = eval(bquote(svymean(~as.numeric(.(formula[[2]])), design, ...))) - rval = coef(m)[1] - #Effective sample size - n.eff = coef(m) * (1 - coef(m))/stats::vcov(m) + method = match.arg(method) + if (method != "beta") { + return( svyciprop(formula, design, method, level, df, ...) ) + } - attr(rval, "var") = stats::vcov(m) - alpha = 1 - level + m = eval(bquote(svymean(~as.numeric(.(formula[[2]])), design, ...))) + rval = coef(m)[1] - # Degrees of freedom used only for adjusting effective sample size, below + #Effective sample size + n.eff = coef(m) * (1 - coef(m))/stats::vcov(m) - # For NHIS, provisional guideline is to override the R default - # df = nrow(design) - 1 ## uncomment this row to override R default + attr(rval, "var") = stats::vcov(m) + alpha = 1 - level - # R-default from degf(design) uses subdomain Strata and PSUs + # Degrees of freedom used only for adjusting effective sample size, below - if (df >0) { #Korn-Graubard df-adjustment factor - rat.squ = (qt(alpha/2, nrow(design) - 1)/qt(alpha/2, df))^2 - } else { - rat.squ = 0 # limit case: set to zero - } + # For NHIS, provisional guideline is to override the R default + # df = nrow(design) - 1 ## uncomment this row to override R default - if (rval > 0) { #Adjusted effective sample size - n.eff = min(nrow(design), n.eff*rat.squ) - } else { - n.eff = nrow(design) #limit case: set to sample size - } + # R-default from degf(design) uses subdomain Strata and PSUs - ci = c(stats::qbeta(alpha/2, n.eff*rval, n.eff*(1-rval)+1), stats::qbeta(1-alpha/2, n.eff*rval+1, n.eff*(1 - rval))) + if (df >0) { #Korn-Graubard df-adjustment factor + rat.squ = (qt(alpha/2, nrow(design) - 1)/qt(alpha/2, df))^2 + } else { + rat.squ = 0 # limit case: set to zero } - else { - svyciprop(formula, design, method, level, df, ...) + + if (rval > 0) { #Adjusted effective sample size + n.eff = min(nrow(design), n.eff*rat.squ) + } else { + n.eff = nrow(design) #limit case: set to sample size } + + ci = c(stats::qbeta(alpha/2, n.eff*rval, n.eff*(1-rval)+1), stats::qbeta(1-alpha/2, n.eff*rval+1, n.eff*(1 - rval))) + halfalpha = (1 - level)/2 names(ci) = paste(round(c(halfalpha, (1 - halfalpha))*100, 1), "%", sep = "") names(rval) = deparse(formula[[2]]) diff --git a/R/tab.R b/R/tab.R index 206ffec..f444bff 100644 --- a/R/tab.R +++ b/R/tab.R @@ -25,7 +25,7 @@ #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file #' -#' @return A list of `data.frame` tables or a single `data.frame` table. +#' @return A list of tables or a single table. #' @family tables #' @export #' diff --git a/R/tab_rate.R b/R/tab_rate.R index 2da43fd..4f1d645 100644 --- a/R/tab_rate.R +++ b/R/tab_rate.R @@ -2,7 +2,7 @@ #' #' Calculate the rates for categorical (factor) or logical variables. #' -#' @param vr variable to tabulate +#' @param vr variable to tabulate #' @param pop either a single number or a `data.frame` with columns named #' `Level` and `Population`. `Level` must #' exactly match the levels of `vr`. `Population` is the population for that @@ -12,7 +12,7 @@ #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file #' -#' @return A list of `data.frame` tables or a single `data.frame` table. +#' @return A list of tables or a single table. #' @family tables #' @export #' diff --git a/R/tab_subset.R b/R/tab_subset.R index e6ab5eb..3ec8713 100644 --- a/R/tab_subset.R +++ b/R/tab_subset.R @@ -25,9 +25,7 @@ #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file #' -#' @return -#' * `tab_subset`: A list of `data.frame` tables or a single `data.frame` table. -#' * `tab_cross`: A `data.frame` table. +#' @return A list of tables or a single table. #' #' @family tables #' @@ -71,7 +69,20 @@ tab_subset = function(vr, vrby, lvls = c() # Need to convert to factor for testing if (is.logical(design$variables[,vr])) { lbl = attr(design$variables[,vr], "label") - design$variables[,vr] %<>% factor %>% droplevels %>% .fix_factor + design$variables[,vr] %<>% factor %>% droplevels + + if (drop_na) { + design = design[which(!is.na(design$variables[,vr])),] + if(inherits(design, "svyrep.design")) { + design$prob = 1 / design$pweights + } + # drop_na in .tab_factor will set this + # lbl %<>% paste("(knowns only)") + } else { + design$variables[,vr] %<>% .fix_factor + } + assert_that(noNA(design$variables[,vr]), noNA(levels(design$variables[,vr]))) + attr(design$variables[,vr], "label") = lbl } diff --git a/R/tab_subset_rate.R b/R/tab_subset_rate.R index 17ba217..6057f60 100644 --- a/R/tab_subset_rate.R +++ b/R/tab_subset_rate.R @@ -14,7 +14,7 @@ #' @param max_levels a categorical variable can have at most this many levels. Used to avoid printing huge tables. #' @param csv name of a CSV file #' -#' @return A list of `data.frame` tables or a single `data.frame` table. +#' @return A list of tables or a single table. #' @family tables #' @export #' diff --git a/R/total.R b/R/total.R index 7ef0e7c..3539951 100644 --- a/R/total.R +++ b/R/total.R @@ -2,7 +2,7 @@ #' #' @param csv name of a CSV file #' -#' @return `data.frame` +#' @return A table #' @family tables #' @export #' diff --git a/R/total_rate.R b/R/total_rate.R index d99918a..3ca7524 100644 --- a/R/total_rate.R +++ b/R/total_rate.R @@ -4,7 +4,7 @@ #' @param per calculate rate per this many items in the population #' @param csv name of a CSV file #' -#' @return `data.frame` +#' @return A table #' @family tables #' @export #' diff --git a/R/var_any.R b/R/var_any.R index 1a3dda8..fa648e4 100644 --- a/R/var_any.R +++ b/R/var_any.R @@ -6,7 +6,7 @@ #' @param newvr name of the new variable to be created #' @param vrs vector of logical variables #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_case.R b/R/var_case.R index ee2f8ce..a5e6fb9 100644 --- a/R/var_case.R +++ b/R/var_case.R @@ -4,7 +4,7 @@ #' @param vr factor variable #' @param cases one or more levels of `vr` that are converted to `TRUE`. All other levels are converted to `FALSE`. #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_collapse.R b/R/var_collapse.R index 80b7786..2224848 100644 --- a/R/var_collapse.R +++ b/R/var_collapse.R @@ -6,7 +6,7 @@ #' @param newlevel name of the new level #' @param oldlevels vector of old levels #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_copy.R b/R/var_copy.R index 1576320..c91b554 100644 --- a/R/var_copy.R +++ b/R/var_copy.R @@ -6,7 +6,7 @@ #' @param newvr name of the new variable to be created #' @param vr variable #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_cross.R b/R/var_cross.R index 0615aa6..f93607d 100644 --- a/R/var_cross.R +++ b/R/var_cross.R @@ -7,7 +7,7 @@ #' @param vr first variable #' @param vrby second variable #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_cut.R b/R/var_cut.R index f551b3c..b76bfe0 100644 --- a/R/var_cut.R +++ b/R/var_cut.R @@ -7,7 +7,7 @@ #' @param breaks see [`cut()`] #' @param labels see [`cut()`] #' -#' @return (Nothing.) +#' @return Survey object #' @family variables #' @export #' diff --git a/R/var_list.R b/R/var_list.R index 34c4ff1..859ce3c 100644 --- a/R/var_list.R +++ b/R/var_list.R @@ -4,7 +4,7 @@ #' @param all print all variables? #' @param csv name of a CSV file #' -#' @return `data.frame` +#' @return A table #' @export #' #' @examples diff --git a/R/z_data.R b/R/z_data.R index 7079907..b7b4a8e 100644 --- a/R/z_data.R +++ b/R/z_data.R @@ -7,7 +7,7 @@ #' `namcs2019sv_df` is a data frame. #' #' `namcs2019sv` is a survey object created from `namcs2019sv_df` -#' using `survey::svydesign()`. +#' using `[survey::svydesign()]`. #' #' @source #' * SAS data: diff --git a/R/z_utils.R b/R/z_utils.R index 033b6ec..eac8c6b 100644 --- a/R/z_utils.R +++ b/R/z_utils.R @@ -1,3 +1,5 @@ +# if (drop_na) do that, else .fix_factor +# then, assert noNA .fix_factor = function(xx) { assert_that(is.factor(xx)) idx = which(is.na(xx)) diff --git a/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html b/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html index 4198781..b23896e 100644 --- a/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html +++ b/docs/articles/Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html @@ -6784,9 +6784,9 @@

More advanced coding
 tmp_file = tempfile(fileext = ".csv")
-suppressMessages( set_output(csv = tmp_file) )
-
-for (vr in c("AGER", "Age group", "SEX", "Age x Sex")) {
+suppressMessages( set_output(csv = tmp_file) )
+
+for (vr in c("AGER", "Age group", "SEX", "Age x Sex")) {
     var_cross("tmp", "MAJOR", vr)
     for (lvl in levels(surveytable:::env$survey$variables[,vr])) {
         tab_subset("SPECCAT", "tmp", paste0("Preventive care : ", lvl))
@@ -6827,14 +6827,14 @@ 

More advanced coding
+
 vr = "AGER"
 var_cross("tmp", "MAJOR", vr)
 ## Warning in var_cross("tmp", "MAJOR", vr): tmp: overwriting a variable that
 ## already exists.
 lvl = levels(surveytable:::env$survey$variables[,vr])[1]
 tab_subset("SPECCAT", "tmp", paste0("Preventive care : ", lvl))
- +
Type of specialty (Primary, Medical, Surgical) ((Major reason for this visit) x (Patient age recode) = Preventive care : Under 15 years) {NAMCS diff --git a/docs/pkgdown.yml b/docs/pkgdown.yml index 4abc475..9bb597e 100644 --- a/docs/pkgdown.yml +++ b/docs/pkgdown.yml @@ -4,7 +4,7 @@ pkgdown_sha: ~ articles: Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables: Example-National-Ambulatory-Medical-Care-Survey-NAMCS-tables.html surveytable: surveytable.html -last_built: 2024-01-18T12:13Z +last_built: 2024-01-18T15:04Z urls: reference: https://cdcgov.github.io/surveytable/reference article: https://cdcgov.github.io/surveytable/articles diff --git a/docs/reference/namcs2019sv.html b/docs/reference/namcs2019sv.html index 5dca08b..54c1fdd 100644 --- a/docs/reference/namcs2019sv.html +++ b/docs/reference/namcs2019sv.html @@ -89,7 +89,7 @@

Source<

Details

namcs2019sv_df is a data frame.

namcs2019sv is a survey object created from namcs2019sv_df -using survey::svydesign().

+using [survey::svydesign()].