xKDR · nadiaenh · Jul 14, 2023 · Jul 14, 2023 · Jul 14, 2023 · Jul 19, 2023
diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ to compute the standard errors.
 
 ```julia
 julia> bootsrs = bootweights(srs; replicates=1000)
-ReplicateDesign:
+ReplicateDesign{BoootstrapReplicates}:
 data: 200×1047 DataFrame
 strata: none
 cluster: none

diff --git a/docs/src/man/replicate.md b/docs/src/man/replicate.md
@@ -4,9 +4,9 @@ Replicate weights are a method for estimating the standard errors of survey stat
 
 The basic idea behind replicate weights is to create multiple versions of the original sample weights, each with small, randomly generated perturbations. The multiple versions of the sample weights are then used to calculate the survey statistic of interest, such as the mean or total, on multiple replicate samples. The variance of the survey statistic is then estimated by computing the variance across the replicate samples.
 
-Currently, the Rao-Wu bootstrap[^1] is the only method in the package for generating replicate weights. 
+Currently, the Rao-Wu bootstrap[^1] and the Jackknife [^2] are the only methods in the package for generating replicate weights. In the future, the package will support additional types of inference methods, which will be passed when creating a `ReplicateDesign` object.
 
-The `bootweights` function of the package can be used to generate a `ReplicateDesign` from a `SurveyDesign`
+The `bootweights` function of the package can be used to generate a `ReplicateDesign` using the Rao-Wu bootstrap method from a `SurveyDesign`.
 For example: 
 ```@repl bootstrap
 using Survey
@@ -15,7 +15,16 @@ dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
 bstrat = bootweights(dstrat; replicates = 10)
 ```
 
-For each replicate, the `DataFrame` of `ReplicateDesign` has an additional column. The of the column is `replicate_` followed by the replicate number.  
+The `jackknifeweights` function of the package can be used to generate a `ReplicateDesign` using the Jackknife method from a `SurveyDesign`.
+For example: 
+```@repl bootstrap
+using Survey
+apistrat = load_data("apistrat")
+dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw)
+bstrat = jackknifeweights(dstrat; replicates = 10)
+```
+
+For each replicate, the `DataFrame` of `ReplicateDesign` has an additional column. The name of the column is `replicate_` followed by the replicate number.  
 
 ```@repl bootstrap
 names(bstrat.data)
@@ -38,4 +47,5 @@ For each replicate weight, the statistic is calculated using it instead of the w
 
 ## References
 
-[^1]: [Rust, Keith F., and J. N. K. Rao. "Variance estimation for complex surveys using replication techniques." Statistical methods in medical research 5.3 (1996): 283-310.](https://journals.sagepub.com/doi/abs/10.1177/096228029600500305?journalCode=smma)
+[^1]: [Rust, Keith F., and J. N. K. Rao. "Variance estimation for complex surveys using replication techniques." Statistical methods in medical research 5.3 (1996): 283-310.](https://journals.sagepub.com/doi/abs/10.1177/096228029600500305?journalCode=smma)
+[^2]: [Miller, Rupert G. “The Jackknife--A Review.” Biometrika 61, no. 1 (1974): 1–15. https://doi.org/10.2307/2334280.](https://www.jstor.org/stable/2334280)
diff --git a/src/bootstrap.jl b/src/bootstrap.jl
@@ -54,45 +54,67 @@ function bootweights(design::SurveyDesign; replicates = 4000, rng = MersenneTwis
 end
 
 """
-    variance(x::Symbol, func::Function, design::ReplicateDesign{BootstrapReplicates})
+    variance(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
 
+Compute the standard error of the estimated mean using the bootstrap method.
 
-Use replicate weights to compute the standard error of the estimated mean using the bootstrap method. The variance is calculated using the formula
+# Arguments
+- `x::Union{Symbol, Vector{Symbol}}`: Symbol or vector of symbols representing the variable(s) for which the mean is estimated.
+- `func::Function`: Function used to calculate the mean.
+- `design::ReplicateDesign{BootstrapReplicates}`: Replicate design object.
+- `args...`: Additional arguments to be passed to the function.
+- `kwargs...`: Additional keyword arguments.
+
+# Returns
+- `df`: DataFrame containing the estimated mean and its standard error.
+
+The variance is calculated using the formula
 
 ```math
 \\hat{V}(\\hat{\\theta}) = \\dfrac{1}{R}\\sum_{i = 1}^R(\\theta_i - \\hat{\\theta})^2
 ```
 
 where above ``R`` is the number of replicate weights, ``\\theta_i`` is the estimator computed using the ``i``th set of replicate weights, and ``\\hat{\\theta}`` is the estimator computed using the original weights.
 
-```jldoctest
-julia> using Survey, StatsBase;
-
-julia> apiclus1 = load_data("apiclus1");
-
-julia> dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw);
+# Examples
 
-julia> bclus1 = dclus1 |> bootweights;
+```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
 
-julia> weightedmean(x, y) = mean(x, weights(y));
+julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
 
-julia> variance(:api00, weightedmean, bclus1)
+julia> variance(:api00, mean, bclus1)
 1×2 DataFrame
  Row │ estimator  SE
      │ Float64    Float64
 ─────┼────────────────────
    1 │   644.169  23.4107
-
 ```
 """
-function variance(x::Symbol, func::Function, design::ReplicateDesign{BootstrapReplicates})
-    θ̂ = func(design.data[!, x], design.data[!, design.weights])
-    θ̂t = [
-        func(design.data[!, x], design.data[!, "replicate_"*string(i)]) for
-        i = 1:design.replicates
+function variance(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{BootstrapReplicates}, args...; kwargs...)
+
+    # Compute the estimators
+    θs = func(design.data, x, design.weights, args...; kwargs...)
+
+    # Compute the estimators for each replicate
+    θts = [
+        func(design.data, x, "replicate_" * string(i), args...; kwargs...) for i in 1:design.replicates
     ]
-    variance = sum((θ̂t .- θ̂) .^ 2) / design.replicates
-    return DataFrame(estimator = θ̂, SE = sqrt(variance))
+
+    # Convert θs and θts to a vector if they are not already
+    θs = (θs isa Vector) ? θs : [θs]  
+    θts = (θts[1] isa Vector) ? θts : [θts]
+
+    # Calculate variances for each estimator
+    variance = Float64[]
+
+    for i in 1:length(θs)
+        θ = θs[i]
+        θt = θts[i]
+        num = sum((θt .- θ) .^ 2) / design.replicates 
+        push!(variance, num)
+    end
+
+    return DataFrame(estimator = θs, SE = sqrt.(variance))
 end
 
 function _bootweights_cluster_sorted!(cluster_sorted,

diff --git a/src/jackknife.jl b/src/jackknife.jl
@@ -94,66 +94,56 @@ Compute variance of column `x` for the given `func` using the Jackknife method.
 Above, ``\\hat{\\theta}`` represents the estimator computed using the original weights, and ``\\hat{\\theta_{(hj)}}`` represents the estimator computed from the replicate weights obtained when PSU ``j`` from cluster ``h`` is removed.
 
 # Examples
-```jldoctest
-julia> using Survey, StatsBase
-
-julia> apistrat = load_data("apistrat");
+```jldoctest; setup = :(using Survey, StatsBase, DataFrames; apistrat = load_data("apistrat"); dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw); rstrat = jackknifeweights(dstrat);)
 
-julia> dstrat = SurveyDesign(apistrat; strata=:stype, weights=:pw);
+julia> mean(df::DataFrame, column, weights) = StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights]));
 
-julia> rstrat = jackknifeweights(dstrat)
-ReplicateDesign{JackknifeReplicates}:
-data: 200×244 DataFrame
-strata: stype
-    [E, E, E  …  M]
-cluster: none
-popsize: [4420.9999, 4420.9999, 4420.9999  …  1018.0]
-sampsize: [100, 100, 100  …  50]
-weights: [44.21, 44.21, 44.21  …  20.36]
-allprobs: [0.0226, 0.0226, 0.0226  …  0.0491]
-type: jackknife
-replicates: 200
-
-julia> weightedmean(x, y) = mean(x, weights(y));
-
-julia> variance(:api00, weightedmean, rstrat)
+julia> variance(:api00, mean, rstrat)
 1×2 DataFrame
  Row │ estimator  SE
      │ Float64    Float64
 ─────┼────────────────────
    1 │   662.287  9.53613
-
 ```
 # Reference
 pg 380-382, Section 9.3.2 Jackknife - Sharon Lohr, Sampling Design and Analysis (2010)
 """
-function variance(x::Symbol, func::Function, design::ReplicateDesign{JackknifeReplicates})
+function variance(x::Union{Symbol, Vector{Symbol}}, func::Function, design::ReplicateDesign{JackknifeReplicates})
+
     df = design.data
-    # sort!(df, [design.strata, design.cluster])
     stratified_gdf = groupby(df, design.strata)
 
     # estimator from original weights
-    θ = func(df[!, x], df[!, design.weights])
+    θs = func(design.data, x, design.weights)
 
-    variance = 0
+    # ensure that θs is a vector
+    θs = (θs isa Vector) ? θs : [θs]  
+
+    variance = zeros(length(θs))
     replicate_index = 1
+
     for subgroup in stratified_gdf
+
         psus_in_stratum = unique(subgroup[!, design.cluster])
         nh = length(psus_in_stratum)
-        cluster_variance = 0
+        cluster_variance = zeros(length(θs))
+
         for psu in psus_in_stratum
-            # get replicate weights corresponding to current stratum and psu
-            rep_weights = df[!, "replicate_"*string(replicate_index)]
 
             # estimator from replicate weights
-            θhj = func(df[!, x], rep_weights)
+            θhjs = func(design.data, x, "replicate_" * string(replicate_index))
+
+            # update the cluster variance for each estimator
+            for i in 1:length(θs)
+                cluster_variance[i] += ((nh - 1)/nh) * (θhjs[i] - θs[i])^2
+            end
 
-            cluster_variance += ((nh - 1)/nh)*(θhj - θ)^2
             replicate_index += 1
         end
-        variance += cluster_variance
-    end
 
-    return DataFrame(estimator = θ, SE = sqrt(variance))
-end
+        # update the overall variance
+        variance .+= cluster_variance
+    end
 
+    return DataFrame(estimator = θs, SE = sqrt.(variance))
+end
diff --git a/src/mean.jl b/src/mean.jl
@@ -33,12 +33,18 @@ end
 """
 	mean(x::Symbol, design::ReplicateDesign)
 
-Use replicate weights to compute the standard error of the estimated mean.
+Compute the standard error of the estimated mean using replicate weights.
+
+# Arguments
+- `x::Symbol`: Symbol representing the variable for which the mean is estimated.
+- `design::ReplicateDesign`: Replicate design object.
+
+# Returns
+- `df`: DataFrame containing the estimated mean and its standard error.
 
 # Examples
 
-```jldoctest; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw))
-julia> bclus1 = dclus1 |> bootweights;
+```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = dclus1 |> bootweights;)
 
 julia> mean(:api00, bclus1)
 1×2 DataFrame
@@ -49,9 +55,17 @@ julia> mean(:api00, bclus1)
 ```
 """
 function mean(x::Symbol, design::ReplicateDesign)
-	weightedmean(x, y) = mean(x, weights(y))
-	df = Survey.variance(x, weightedmean, design)
+
+    # Define an inner function to calculate the mean
+    function compute_mean(df::DataFrame, column, weights_column)
+        return StatsBase.mean(df[!, column], StatsBase.weights(df[!, weights_column]))
+    end
+
+    # Calculate the mean and variance
+    df = Survey.variance(x, compute_mean, design)
+
     rename!(df, :estimator => :mean)
+
     return df
 end
 
@@ -135,4 +149,4 @@ function mean(x::Symbol, domain, design::AbstractSurveyDesign)
     df = bydomain(x, domain, design, weighted_mean)
     rename!(df, :statistic => :mean)
     return df
-end
+end
diff --git a/src/quantile.jl b/src/quantile.jl
@@ -33,10 +33,22 @@ function quantile(var::Symbol, design::SurveyDesign, p::Real; kwargs...)
 end
 
 """
-Use replicate weights to compute the standard error of the estimated quantile. 
+    quantile(x::Symbol, design::ReplicateDesign, p; kwargs...)
+
+Compute the standard error of the estimated quantile using replicate weights.
 
-```jldoctest; setup = :(apisrs = load_data("apisrs");srs = SurveyDesign(apisrs; weights=:pw))
-julia> bsrs = srs |> bootweights; 
+# Arguments
+- `x::Symbol`: Symbol representing the variable for which the quantile is estimated.
+- `design::ReplicateDesign`: Replicate design object.
+- `p::Real`: Quantile value to estimate, ranging from 0 to 1.
+- `kwargs...`: Additional keyword arguments.
+
+# Returns
+- `df`: DataFrame containing the estimated quantile and its standard error.
+
+# Examples
+
+```jldoctest; setup = :(using Survey, StatsBase; apisrs = load_data("apisrs");srs = SurveyDesign(apisrs; weights=:pw); bsrs = srs |> bootweights;)
 
 julia> quantile(:api00, bsrs, 0.5)
 1×2 DataFrame
@@ -46,10 +58,18 @@ julia> quantile(:api00, bsrs, 0.5)
    1 │            659.0  14.9764
 ```
 """
-function quantile(var::Symbol, design::ReplicateDesign, p::Real; kwargs...)
-    quantile_func(v, weights) = Statistics.quantile(v, ProbabilityWeights(weights), p)
-    df = Survey.variance(var, quantile_func, design)
+function quantile(x::Symbol, design::ReplicateDesign, p::Real; kwargs...)
+
+    # Define an inner function to calculate the quantile
+    function compute_quantile(df::DataFrame, column, weights_column)
+        return Statistics.quantile(df[!, column], ProbabilityWeights(df[!, weights_column]), p)
+    end
+
+    # Calculate the quantile and variance
+    df = variance(x, compute_quantile, design)
+
     rename!(df, :estimator => string(p) * "th percentile")
+
     return df
 end
 

diff --git a/src/ratio.jl b/src/ratio.jl
@@ -25,33 +25,38 @@ function ratio(variable_num::Symbol, variable_den::Symbol, design::SurveyDesign)
 end
 
 """
-Use replicate weights to compute the standard error of the ratio.
+    ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign)
 
-```jldoctest; setup = :(apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw))
-julia> bclus1 = bootweights(dclus1); 
+Compute the standard error of the ratio using replicate weights.
+
+# Arguments
+- `variable_num::Symbol`: Symbol representing the numerator variable.
+- `variable_den::Symbol`: Symbol representing the denominator variable.
+- `design::ReplicateDesign`: Replicate design object.
+
+# Returns
+- `var`: Variance of the ratio.
+
+# Examples
+
+```jldoctest; setup = :(using Survey, StatsBase; apiclus1 = load_data("apiclus1"); dclus1 = SurveyDesign(apiclus1; clusters = :dnum, weights = :pw); bclus1 = bootweights(dclus1);)
 
 julia> ratio(:api00, :enroll, bclus1)
 1×2 DataFrame
- Row │ ratio    SE
-     │ Float64  Float64
-─────┼───────────────────
-   1 │ 1.17182  0.131518
-
+ Row │ estimator  SE
+     │ Float64    Float64
+─────┼─────────────────────
+   1 │   1.17182  0.131518
 ```
 """
 function ratio(variable_num::Symbol, variable_den::Symbol, design::ReplicateDesign)
-    X =
-        wsum(design.data[!, variable_num], design.data[!, design.weights]) /
-        wsum(design.data[!, variable_den], design.data[!, design.weights])
-    Xt = [
-        (wsum(
-            design.data[!, variable_num],
-            weights(design.data[!, "replicate_"*string(i)]),
-        )) / (wsum(
-            design.data[!, variable_den],
-            weights(design.data[!, "replicate_"*string(i)]),
-        )) for i = 1:design.replicates
-    ]
-    variance = sum((Xt .- X) .^ 2) / design.replicates
-    DataFrame(ratio = X, SE = sqrt(variance))
-end
+
+    # Define an inner function to calculate the ratio
+    function compute_ratio(df::DataFrame, columns, weights_column)
+        return sum(df[!, columns[1]], StatsBase.weights(df[!, weights_column])) / sum(df[!, columns[2]], StatsBase.weights(df[!, weights_column]))
+    end
+
+    # Calculate the variance using the `variance` function with the inner function
+    var = variance([variable_num, variable_den], compute_ratio, design)
+    return var
+end