Guard against coordinates with no variance (#93)

* Guard against coordinates with no variance) This returns finite shrink-covariances even when one or more coordinates has zero variance. Applicable only to linear shrinkage. Activated only if one sets `cov(LSE, X; drop_var0=true)`. * Move drop_var0 into LinearShrinkage
mateuszbaran · Nov 29, 2023 · edc0b29 · edc0b29 · mateuszbaran · Nov 29, 2023
1 parent d6b428d
commit edc0b29
Show file tree

Hide file tree

Showing 4 changed files with 85 additions and 64 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "CovarianceEstimation"
 uuid = "587fd27a-f159-11e8-2dae-1979310e6154"
 authors = ["Mateusz Baran <[email protected]>", "Thibaut Lienart"]
-version = "0.2.10"
+version = "0.2.11"
 
 [deps]
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"

diff --git a/src/linearshrinkage.jl b/src/linearshrinkage.jl
@@ -73,7 +73,7 @@ A subtype of `LinearShrinkageTarget` where
 struct ConstantCorrelation <: LinearShrinkageTarget end
 
 """
-    LinearShrinkage(target, shrinkage; corrected=false)
+    LinearShrinkage(target, shrinkage; corrected=false, drop_var0=false)
 
 Linear shrinkage estimator described by equation
 ``(1 - \\lambda) S + \\lambda F`` where ``S`` is standard covariance matrix,
@@ -82,28 +82,32 @@ shrinkage parameter, either given explicitly in `shrinkage` or automatically
 determined according to one of the supported methods.
 
 The corrected estimator is used if `corrected` is true.
+`drop_var0=true` drops the zero-variance variables from the computation of `\\lambda`.
 """
 struct LinearShrinkage{T<:LinearShrinkageTarget, S<:Shrinkage} <: CovarianceEstimator
     target::T
     shrinkage::S
     corrected::Bool
-    function LinearShrinkage(t::TT, s::SS; corrected=false) where {TT<:LinearShrinkageTarget, SS<:Real}
+    drop_var0::Bool
+
+    function LinearShrinkage(t::TT, s::SS; corrected=false, drop_var0=false) where {TT<:LinearShrinkageTarget, SS<:Real}
         0 ≤ s ≤ 1 || throw(ArgumentError("Shrinkage value should be between 0 and 1. Got $s."))
-        new{TT, SS}(t, s, corrected)
+        new{TT, SS}(t, s, corrected, drop_var0)
     end
-    function LinearShrinkage(t::TT, s::Symbol=:auto; corrected=false) where TT <: LinearShrinkageTarget
+    function LinearShrinkage(t::TT, s::Symbol=:auto; corrected=false, drop_var0=false) where TT <: LinearShrinkageTarget
         s ∈ (:auto, :lw, :ss, :rblw, :oas) || throw(ArgumentError("Shrinkage method $s not supported."))
-        new{TT, Symbol}(t, s, corrected)
+        new{TT, Symbol}(t, s, corrected, drop_var0)
     end
 end
 
 LinearShrinkage(;
     target::LinearShrinkageTarget=DiagonalUnitVariance(),
     shrinkage::Shrinkage,
-    corrected::Bool=false) = LinearShrinkage(target, shrinkage, corrected=corrected)
+    corrected::Bool=false,
+    drop_var0::Bool=false) = LinearShrinkage(target, shrinkage, corrected=corrected, drop_var0=drop_var0)
 
 """
-    cov(lse::LinearShrinkage, X, [weights::FrequencyWeights]; dims=1)
+    cov(lse::LinearShrinkage, X, [weights::FrequencyWeights]; dims=1, mean=nothing)
 
 Linear shrinkage covariance estimator for matrix `X` along dimension `dims`.
 Computed using the method described by `lse`.
@@ -124,6 +128,7 @@ function cov(lse::LinearShrinkage, X::AbstractMatrix{<:Real}, weights::Frequency
     n, p = size(Xc)
     # sample covariance of size (p x p)
     S = cov(SimpleCovariance(corrected=lse.corrected), X, weights...; dims=dims, mean=mean)
+    pvar = p - (lse.drop_var0 ? sum(iszero, diag(S)) : 0)
 
     # NOTE: don't need to check if mean is proper as this is already done above
     if mean === nothing
@@ -136,7 +141,7 @@ function cov(lse::LinearShrinkage, X::AbstractMatrix{<:Real}, weights::Frequency
         end
     end
 
-    return linear_shrinkage(lse.target, Xc, S, lse.shrinkage, n, p, lse.corrected, weights...)
+    return linear_shrinkage(lse.target, Xc, S, lse.shrinkage, n, p, pvar, lse.corrected, weights...)
 end
 
 ##############################################################################
@@ -228,45 +233,50 @@ https://strimmerlab.github.io/publications/journals/shrinkcov2005.pdf p.11.
 """
 function sum_fij(Xc, S, n, κ)
     sd  = sqrt.(diag(S))
-    M   = ((Xc.^3)' * Xc) ./ sd
+    sdinv = map(z -> guardeddiv(1, z), sd)
+    M   = ((Xc.^3)' * Xc) .* sdinv
     M .-= κ .* S .* sd
     M .*= sd'
     return sumij(M) / (n * κ)
 end
 function sum_fij(Xc, S, n, κ, weights)
     sd  = sqrt.(diag(S))
-    M   = ((Xc.^3)' * (weights .* Xc)) ./ sd
+    sdinv = map(z -> guardeddiv(1, z), sd)
+    M   = ((Xc.^3)' * (weights .* Xc)) .* sdinv
     M .-= κ .* S .* sd
     M .*= sd'
     return sumij(M) / (sum(weights) * κ)
 end
 ##############################################################################
 
 """
-    linear_shrinkage(target, Xc, S, λ, n, p, corrected, [weights])
+    linear_shrinkage(target, Xc, S, λ, n, p, pvar, corrected, [weights])
 
 Performs linear shrinkage with target of type `target` for data matrix `Xc`
 of size `n` by `p` with covariance matrix `S` and shrinkage parameter `λ`.
 Calculates corrected covariance if `corrected` is true.
+
+`pvar == p` or `pvar = p - sum(iszero, diag(S))`, the number of non-zero
+diagonal variances in `S`. The choice is controlled by `LinearShrinkage(...; drop_var0=true/false)`.
 """
 linear_shrinkage
 
 ## TARGET A
 
 function linear_shrinkage(::DiagonalUnitVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     return linshrink(I, S, λ)
 end
 
 """
-    linear_shrinkage(::DiagonalUnitVariance, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::DiagonalUnitVariance, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a `DiagonalUnitVariance`.
 """
 function linear_shrinkage(::DiagonalUnitVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     F   = I
@@ -279,14 +289,14 @@ function linear_shrinkage(::DiagonalUnitVariance, Xc::AbstractMatrix,
     if λ ∈ [:auto, :lw]
         ΣS² = sumij2(S, with_diag=true)
         λ   = sumij(uccov(Xc², weights...), with_diag=true) / γ^2 - ΣS²
-        λ  /= κ * (ΣS² - 2tr(S) + p)
+        λ  /= κ * (ΣS² - 2tr(S) + pvar)
     elseif λ == :ss
         # use the standardised data matrix
-        d   = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         S̄   = rescale(S, sqrt.(d)) # this has diagonal 1/κ
         ΣS̄² = sumij2(S̄, with_diag=true)
         λ   = sumij(rescale!(uccov(Xc², weights...), d), with_diag=true) / γ^2 - ΣS̄²
-        λ  /= T(κ * ΣS̄² - p / κ)
+        λ  /= T(κ * ΣS̄² - pvar / κ)
     else
         throw(ArgumentError("Unsupported shrinkage method for target DiagonalUnitVariance: $λ."))
     end
@@ -296,25 +306,25 @@ end
 
 ## TARGET B
 
-target_B(S::AbstractMatrix, p::Int) = tr(S)/p * I
+target_B(S::AbstractMatrix, pvar::Int) = tr(S)/pvar * I
 
 function linear_shrinkage(::DiagonalCommonVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    return linshrink(target_B(S, p), S, λ)
+    return linshrink(target_B(S, pvar), S, λ)
 end
 
 """
-    linear_shrinkage(::DiagonalCommonVariance, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::DiagonalCommonVariance, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a `DiagonalCommonVariance`.
 """
 function linear_shrinkage(::DiagonalCommonVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    F   = target_B(S, p)
+    F   = target_B(S, pvar)
     T   = float(eltype(F))
     wn = totalweight(n, weights...)
     κ   = wn - Int(corrected)
@@ -325,27 +335,27 @@ function linear_shrinkage(::DiagonalCommonVariance, Xc::AbstractMatrix,
         v   = F.λ # tr(S)/p
         ΣS² = sumij2(S, with_diag=true)
         λ   = sumij(uccov(Xc², weights...), with_diag=true) / γ^2 - ΣS²
-        λ  /= κ * (ΣS² - p*v^2)
+        λ  /= κ * (ΣS² - pvar*v^2)
     elseif λ == :ss
         # use the standardised data matrix
-        d   = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         S̄   = rescale(S, sqrt.(d)) # this has diagonal 1/κ
         v̄   = κ # tr(S̄)/p
         ΣS̄² = sumij2(S̄, with_diag=true)
         λ   = sumij(rescale!(uccov(Xc², weights...), d), with_diag=true) / γ^2 - ΣS̄²
-        λ  /= T(κ * ΣS̄² - p/κ)
+        λ  /= T(κ * ΣS̄² - pvar/κ)
     elseif λ == :rblw
         # https://arxiv.org/pdf/0907.4698.pdf equations 17, 19
         trS² = sum(abs2, S)
         tr²S = tr(S)^2
         # note: using corrected or uncorrected S does not change λ
-        λ = T(((wn-2)/wn * trS² + tr²S) / ((wn+2) * (trS² - tr²S/p)))
+        λ = T(((wn-2)/wn * trS² + tr²S) / ((wn+2) * (trS² - tr²S/pvar)))
     elseif λ == :oas
         # https://arxiv.org/pdf/0907.4698.pdf equation 23
         trS² = sum(abs2, S)
         tr²S = tr(S)^2
         # note: using corrected or uncorrected S does not change λ
-        λ = ((one(T)-T(2.0)/p) * trS² + tr²S) / ((wn+one(T)-T(2.0)/p) * (trS² - tr²S/p))
+        λ = ((one(T)-T(2.0)/pvar) * trS² + tr²S) / ((wn+one(T)-T(2.0)/pvar) * (trS² - tr²S/pvar))
     else
         throw(ArgumentError("Unsupported shrinkage method for target DiagonalCommonVariance: $λ."))
     end
@@ -358,19 +368,19 @@ end
 target_D(S::AbstractMatrix) = Diagonal(S)
 
 function linear_shrinkage(::DiagonalUnequalVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     return linshrink(target_D(S), S, λ)
 end
 
 """
-    linear_shrinkage(::DiagonalUnequalVariance, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::DiagonalUnequalVariance, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a `DiagonalUnequalVariance`.
 """
 function linear_shrinkage(::DiagonalUnequalVariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     F   = target_D(S)
@@ -388,7 +398,7 @@ function linear_shrinkage(::DiagonalUnequalVariance, Xc::AbstractMatrix,
         keep = diag(S) .> zero(T)
         Xc² = Xc²[:, keep]
         # use the standardised data matrix
-        d   = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         ΣS̄² = sumij2(rescale(S[keep, keep], sqrt.(d)))
         λ   = sumij(rescale!(uccov(Xc², weights...), d)) / γ^2 - ΣS̄²
         λ  /= κ * ΣS̄²
@@ -401,33 +411,33 @@ end
 
 ## TARGET C
 
-function target_C(S::AbstractMatrix, p::Int)
-    v  = tr(S)/p
-    c  = sumij(S; with_diag=false) / (p * (p - 1))
+function target_C(S::AbstractMatrix, p::Int, pvar::Int)
+    v  = tr(S)/pvar
+    c  = sumij(S; with_diag=false) / (pvar * (pvar - 1))
     F  = fill(c, (p, p))
     F -= Diagonal(F)
     F += v * I
     return F, v, c
 end
 
 function linear_shrinkage(::CommonCovariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    F, _, _ = target_C(S, p)
+    F, _, _ = target_C(S, p, pvar)
     return linshrink!(F, S, λ)
 end
 
 """
-    linear_shrinkage(::CommonCovariance, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::CommonCovariance, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a `CommonCovariance`.
 """
 function linear_shrinkage(::CommonCovariance, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    F, v, c = target_C(S, p)
+    F, v, c = target_C(S, p, pvar)
     T   = float(eltype(F))
     wn = totalweight(n, weights...)
     κ   = wn - Int(corrected)
@@ -437,13 +447,13 @@ function linear_shrinkage(::CommonCovariance, Xc::AbstractMatrix,
     if λ ∈ [:auto, :lw]
         ΣS² = sumij2(S, with_diag=true)
         λ   = sumij(uccov(Xc², weights...), with_diag=true) / γ^2 - ΣS²
-        λ  /= κ * (ΣS² - p*(p-1)*c^2 - p*v^2)
+        λ  /= κ * (ΣS² - pvar*(pvar-1)*c^2 - pvar*v^2)
     elseif λ == :ss
-        d   = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         S̄   = rescale(S, sqrt.(d))
         ΣS̄² = sumij2(S̄, with_diag=true)
         λ   = sumij(rescale!(uccov(Xc², weights...), d), with_diag=true) / γ^2 - ΣS̄²
-        λ  /= κ * ΣS̄² - p/κ - κ * sumij(S̄; with_diag=false)^2 / (p * (p - 1))
+        λ  /= κ * ΣS̄² - pvar/κ - κ * sumij(S̄; with_diag=false)^2 / (pvar * (pvar - 1))
     else
         throw(ArgumentError("Unsupported shrinkage method for target CommonCovariance: $λ."))
     end
@@ -459,20 +469,20 @@ function target_E(S::AbstractMatrix)
 end
 
 function linear_shrinkage(::PerfectPositiveCorrelation, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     return linshrink!(target_E(S), S, λ)
 end
 
 """
-    linear_shrinkage(::PerfectPositiveCorrelation, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::PerfectPositiveCorrelation, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a
 `PerfectPositiveCorrelation`.
 """
 function linear_shrinkage(::PerfectPositiveCorrelation, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
     F   = target_E(S)
@@ -488,7 +498,7 @@ function linear_shrinkage(::PerfectPositiveCorrelation, Xc::AbstractMatrix,
         λ  -= sum_fij(Xc, S, n, κ, weights...)
         λ  /= sumij2(S - F)
     elseif λ == :ss
-        d   = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         s   = sqrt.(d)
         S̄   = rescale(S, s)
         ΣS̄² = sumij2(S̄)
@@ -505,33 +515,34 @@ end
 
 ## TARGET F
 
-function target_F(S::AbstractMatrix, p::Int)
+function target_F(S::AbstractMatrix, p::Int, pvar::Int)
     s  = sqrt.(diag(S))
-    s_ = s*s'
-    r̄  = (sum(S ./ s_) - p) / (p * (p - 1))
-    F_ = r̄ * s_
-    F  = F_ + (Diagonal(s_) - Diagonal(F_))
+    sinv = pvar < p ? map(z -> guardeddiv(1, z), s) : 1 ./ s
+    sinv_ = sinv*sinv'
+    r̄  = (sum(S .* sinv_) - pvar) / (pvar * (pvar - 1))
+    F_ = r̄ * (s*s')
+    F  = F_ + (Diagonal(S) - Diagonal(F_))
     return F, r̄
 end
 
 function linear_shrinkage(::ConstantCorrelation, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Real, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Real, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    F, _ = target_F(S, p)
+    F, _ = target_F(S, p, pvar)
     return linshrink!(F, S, λ)
 end
 
 """
-    linear_shrinkage(::ConstantCorrelation, Xc, S, λ, n, p, corrected)
+    linear_shrinkage(::ConstantCorrelation, Xc, S, λ, n, p, pvar, corrected)
 
 Compute the shrinkage estimator where the target is a `ConstantCorrelation`.
 """
 function linear_shrinkage(::ConstantCorrelation, Xc::AbstractMatrix,
-                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int,
+                          S::AbstractMatrix, λ::Symbol, n::Int, p::Int, pvar::Int,
                           corrected::Bool, weights::FrequencyWeights...)
 
-    F, r̄ = target_F(S, p)
+    F, r̄ = target_F(S, p, pvar)
     T    = float(eltype(F))
     wn = totalweight(n, weights...)
     κ    = wn - Int(corrected)
@@ -544,10 +555,10 @@ function linear_shrinkage(::ConstantCorrelation, Xc::AbstractMatrix,
         λ  -= r̄ * sum_fij(Xc, S, n, κ, weights...)
         λ  /= sumij2(S - F)
     elseif λ == :ss
-        d    = one(T) ./ vec(sum(Xc², weights...; dims=1))
+        d   = diaginv(pvar < p, oneunit(T), vec(sum(Xc², weights...; dims=1)))
         s    = sqrt.(d)
         S̄    = rescale(S, s)
-        F̄, r̄ = target_F(S̄, p)
+        F̄, r̄ = target_F(S̄, p, pvar)
         ΣS̄²  = sumij2(S̄)
         λ    = (sumij(rescale!(uccov(Xc², weights...), d)) / γ^2 - ΣS̄²) / κ
         λ   -= r̄ * sum_fij(Xc .* s', S̄, n, κ, weights...)