From d351421115296a1367fe2f6dee08ae0cd266b29f Mon Sep 17 00:00:00 2001 From: Debartha Paul Date: Thu, 13 Jul 2023 08:44:43 +0530 Subject: [PATCH 1/5] Weighted Average Shifted Histograms Adding the functionality of weights in average shifted histograms for univariates --- src/AverageShiftedHistograms.jl | 2 +- src/univariate.jl | 54 +++++++++++++++++++++++++++++++++ test/runtests.jl | 17 +++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/AverageShiftedHistograms.jl b/src/AverageShiftedHistograms.jl index bc3632a..5debe41 100755 --- a/src/AverageShiftedHistograms.jl +++ b/src/AverageShiftedHistograms.jl @@ -3,7 +3,7 @@ module AverageShiftedHistograms import UnicodePlots, RecipesBase using LinearAlgebra, Statistics import StatsBase: nobs, fweights -export ash, ash!, extendrange, xy, xyz, nout, nobs, Kernels +export ash, ashw, ash!, ashw!, extendrange, xy, xyz, nout, nobs, Kernels #-----------------------------------------------------------------------# common diff --git a/src/univariate.jl b/src/univariate.jl index 230cc74..99e2889 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -23,6 +23,7 @@ function Base.show(io::IO, ::MIME"text/plain", o::Ash) end Base.push!(o::Ash, y::Real) = _histogram!(o::Ash, [y]) +Base.push!(o::Ash, y::Real, weight::Real) = _weightedhistogram!(o::Ash, [y], [weight]) # add data to the histogram function _histogram!(o::Ash, y) @@ -40,6 +41,47 @@ function _histogram!(o::Ash, y) return end +#add weighted data to the histogram +#= +function _weightedhistogram!(o::Ash, y, weight) + b = length(o.rng) + a = first(o.rng) + b == length(weight) || throw(ArgumentError("length of Weights should be same as the length of the range")) + δinv = inv(step(o.rng)) + c = o.counts + for yi in y + ki = floor(Int, (yi - a) * δinv + 1.5) + if 1 <= ki <= b + @inbounds c[ki] += weight[ki] + end + end + o.nobs += length(y) + return +end +=# +function _weightedhistogram!(o::Ash, y, weight) + b = length(o.rng) + a = first(o.rng) + b == length(weight) || throw(DimensionMismatch("Length of weights should be same as the length of the range")) + δinv = inv(step(o.rng)) + c = o.counts + map(y) do x + ki = floor(Int, (x - a) * δinv + 1.5) + if 1 <= ki <= b + c[ki] += weight[ki] + end + end +#= function increaseindex(x) + ki = floor(Int, (x - a) * δinv + 1.5) + if 1 <= ki <= b + @inbounds c[ki] += weight[ki] + end + end + increaseindex.(y)=# + o.nobs += length(y) + return +end + # recalculate the ash density function _ash!(o::Ash) b = length(o.rng) @@ -97,6 +139,12 @@ function ash(x; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(In _ash!(o) end +function ashw(x, weight; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) + o = Ash(rng, kernel, m) + _weightedhistogram!(o, x, weight) + _ash!(o) +end + """ ash!(o::Ash; kw...) @@ -115,6 +163,12 @@ function ash!(o::Ash, y; m = o.m, kernel = o.kernel) _histogram!(o, y) _ash!(o) end +function ashw!(o::Ash, y, weight; m = o.m, kernel = o.kernel) + o.m = m + o.kernel = kernel + _weightedhistogram!(o, y, weight) + _ash!(o) +end function Base.merge!(o::Ash, o2::Ash) o.kernel == o2.kernel || error("Merge failed. Ash objects use different kernels.") diff --git a/test/runtests.jl b/test/runtests.jl index 1a08b6f..5ad4fb0 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -107,4 +107,21 @@ end xyz(o) end +@testset "AshWeighted" begin + x = randn(10_000) + o = ashw(x, ones(21); rng = -1:0.1:1) + o2 = ash(x; rng = -1:0.1:1) + @test o == o2 + + y = rand(1000) + w = rand(1:10, 11) + o = ash(y; rng = 0:0.1:1) + ow = ashw(y, w; rng = 0:0.1:1) + @test o.counts .* w == ow.counts + + w = rand(1:10, 10) + @test_throws DimensionMismatch ashw(y, w; rng = 0:0.1:1) + +end + end #module From 974482adb66f7153676298819770b1f59efd81c2 Mon Sep 17 00:00:00 2001 From: Debartha Paul Date: Tue, 25 Jul 2023 11:50:32 +0530 Subject: [PATCH 2/5] Updated documentation Added documentation for weights --- src/univariate.jl | 41 ++++++++++++++--------------------------- 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/src/univariate.jl b/src/univariate.jl index 99e2889..900a8c8 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -41,24 +41,7 @@ function _histogram!(o::Ash, y) return end -#add weighted data to the histogram -#= -function _weightedhistogram!(o::Ash, y, weight) - b = length(o.rng) - a = first(o.rng) - b == length(weight) || throw(ArgumentError("length of Weights should be same as the length of the range")) - δinv = inv(step(o.rng)) - c = o.counts - for yi in y - ki = floor(Int, (yi - a) * δinv + 1.5) - if 1 <= ki <= b - @inbounds c[ki] += weight[ki] - end - end - o.nobs += length(y) - return -end -=# +# add data to the weighted histogram function _weightedhistogram!(o::Ash, y, weight) b = length(o.rng) a = first(o.rng) @@ -71,13 +54,6 @@ function _weightedhistogram!(o::Ash, y, weight) c[ki] += weight[ki] end end -#= function increaseindex(x) - ki = floor(Int, (x - a) * δinv + 1.5) - if 1 <= ki <= b - @inbounds c[ki] += weight[ki] - end - end - increaseindex.(y)=# o.nobs += length(y) return end @@ -110,6 +86,15 @@ Fit an average shifted histogram to data `x`. Keyword options are: - `m` : Number of adjacent histograms to smooth over - `kernel` : kernel used to smooth the estimate +# Univariate Weighted Ash + ashw(x, weight; kw...) + +Fit a weighted average shifted histogram to data `x`, with weights being `weights`. Keyword options are: + +- `rng` : values over which the density will be estimated +- `m` : Number of adjacent histograms to smooth over +- `kernel` : kernel used to smooth the estimate + # Bivariate Ash ash(x, y; kw...) @@ -122,11 +107,12 @@ Fit a bivariate averaged shifted histogram to data vectors `x` and `y`. Keyword - `kernely` : kernel in y direction # Mutating an Ash object -Ash objectes can be updated with new data, smoothing parameter(s), or kernel(s). They cannot, however, change the ranges over which the density is estimated. It is therefore suggested to err on the side of caution when choosing data endpoints. +Ash objectes can be updated with new data, new weights(only for univariates), smoothing parameter(s), or kernel(s). They cannot, however, change the ranges over which the density is estimated. It is therefore suggested to err on the side of caution when choosing data endpoints. # univariate ash!(obj; kw...) ash!(obj, newx, kw...) + ashw!(obj, newx, newweight; kw...) # bivariate ash!(obj; kw...) @@ -149,8 +135,9 @@ end """ ash!(o::Ash; kw...) ash!(o::Ash, newdata; kw...) + ashw!(o::Ash, newdata, newweight; kw...) -Update an Ash estimate with new data, smoothing parameter (keyword `m`), or kernel (keyword `kernel`): +Update an Ash estimate with new data, new weight, smoothing parameter (keyword `m`), or kernel (keyword `kernel`): """ function ash!(o::Ash; m = o.m, kernel = o.kernel) o.m = m From a9881ce295fbbf2f277762f23c32e4bb77e845b9 Mon Sep 17 00:00:00 2001 From: Debartha Paul Date: Wed, 2 Aug 2023 22:18:39 +0530 Subject: [PATCH 3/5] Weights as Keyword argument Changed the weights to keyword argument. Updated documentations and tests accordingly --- src/univariate.jl | 33 +++++++++++++++++++++------------ test/runtests.jl | 9 +++++---- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/src/univariate.jl b/src/univariate.jl index 900a8c8..e0f8cb2 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -87,10 +87,11 @@ Fit an average shifted histogram to data `x`. Keyword options are: - `kernel` : kernel used to smooth the estimate # Univariate Weighted Ash - ashw(x, weight; kw...) + ashw(x; weight, kw...) Fit a weighted average shifted histogram to data `x`, with weights being `weights`. Keyword options are: +- `weight` : weights (defaults to `nothing`) - `rng` : values over which the density will be estimated - `m` : Number of adjacent histograms to smooth over - `kernel` : kernel used to smooth the estimate @@ -112,7 +113,7 @@ Ash objectes can be updated with new data, new weights(only for univariates), sm # univariate ash!(obj; kw...) ash!(obj, newx, kw...) - ashw!(obj, newx, newweight; kw...) + ashw!(obj, newx; kw...) # bivariate ash!(obj; kw...) @@ -125,17 +126,21 @@ function ash(x; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(In _ash!(o) end -function ashw(x, weight; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) - o = Ash(rng, kernel, m) - _weightedhistogram!(o, x, weight) - _ash!(o) +function ashw(x; weight=nothing, nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) + if weight === nothing + ash(x; nbin = nbin, rng = rng, m = m, kernel = kernel) + else + o = Ash(rng, kernel, m) + _weightedhistogram!(o, x, weight) + _ash!(o) + end end """ ash!(o::Ash; kw...) ash!(o::Ash, newdata; kw...) - ashw!(o::Ash, newdata, newweight; kw...) + ashw!(o::Ash, newdata; kw...) Update an Ash estimate with new data, new weight, smoothing parameter (keyword `m`), or kernel (keyword `kernel`): """ @@ -150,11 +155,15 @@ function ash!(o::Ash, y; m = o.m, kernel = o.kernel) _histogram!(o, y) _ash!(o) end -function ashw!(o::Ash, y, weight; m = o.m, kernel = o.kernel) - o.m = m - o.kernel = kernel - _weightedhistogram!(o, y, weight) - _ash!(o) +function ashw!(o::Ash, y; weight=nothing, m = o.m, kernel = o.kernel) + if weight === nothing + ash!(o, y; m = m, kernel = kernel) + else + o.m = m + o.kernel = kernel + _weightedhistogram!(o, y, weight) + _ash!(o) + end end function Base.merge!(o::Ash, o2::Ash) diff --git a/test/runtests.jl b/test/runtests.jl index 5ad4fb0..5531dec 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,18 +109,19 @@ end @testset "AshWeighted" begin x = randn(10_000) - o = ashw(x, ones(21); rng = -1:0.1:1) + o = ashw(x; weight = ones(21), rng = -1:0.1:1) o2 = ash(x; rng = -1:0.1:1) - @test o == o2 + o3 = ashw(x; rng = -1:0.1:1) + @test o == o2 == o3 y = rand(1000) w = rand(1:10, 11) o = ash(y; rng = 0:0.1:1) - ow = ashw(y, w; rng = 0:0.1:1) + ow = ashw(y; weight = w, rng = 0:0.1:1) @test o.counts .* w == ow.counts w = rand(1:10, 10) - @test_throws DimensionMismatch ashw(y, w; rng = 0:0.1:1) + @test_throws DimensionMismatch ashw(y; weight = w, rng = 0:0.1:1) end From 444af4b5dfc963f9ea6611fcf9c18370d0e53c96 Mon Sep 17 00:00:00 2001 From: Debartha Paul Date: Sat, 16 Sep 2023 09:51:17 +0530 Subject: [PATCH 4/5] Changes to `ash` and `ash!` - Removed functions `ashw` and `ashw!` - Integrated the weighted-ash as part of `ash` and `ash!` - Updated tests --- src/AverageShiftedHistograms.jl | 2 +- src/univariate.jl | 24 ++++++------------------ test/runtests.jl | 9 ++++----- 3 files changed, 11 insertions(+), 24 deletions(-) diff --git a/src/AverageShiftedHistograms.jl b/src/AverageShiftedHistograms.jl index 5debe41..bc3632a 100755 --- a/src/AverageShiftedHistograms.jl +++ b/src/AverageShiftedHistograms.jl @@ -3,7 +3,7 @@ module AverageShiftedHistograms import UnicodePlots, RecipesBase using LinearAlgebra, Statistics import StatsBase: nobs, fweights -export ash, ashw, ash!, ashw!, extendrange, xy, xyz, nout, nobs, Kernels +export ash, ash!, extendrange, xy, xyz, nout, nobs, Kernels #-----------------------------------------------------------------------# common diff --git a/src/univariate.jl b/src/univariate.jl index e0f8cb2..4422d94 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -120,20 +120,14 @@ Ash objectes can be updated with new data, new weights(only for univariates), sm ash!(obj, newx, newy; kw...) """ -function ash(x; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) +function ash(x; weight=nothing, nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) o = Ash(rng, kernel, m) - _histogram!(o, x) - _ash!(o) -end - -function ashw(x; weight=nothing, nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) if weight === nothing - ash(x; nbin = nbin, rng = rng, m = m, kernel = kernel) + _histogram!(o, x) else - o = Ash(rng, kernel, m) _weightedhistogram!(o, x, weight) - _ash!(o) end + _ash!(o) end @@ -149,21 +143,15 @@ function ash!(o::Ash; m = o.m, kernel = o.kernel) o.kernel = kernel _ash!(o) end -function ash!(o::Ash, y; m = o.m, kernel = o.kernel) +function ash!(o::Ash, y; weight=nothing, m = o.m, kernel = o.kernel) o.m = m o.kernel = kernel - _histogram!(o, y) - _ash!(o) -end -function ashw!(o::Ash, y; weight=nothing, m = o.m, kernel = o.kernel) if weight === nothing - ash!(o, y; m = m, kernel = kernel) + _histogram!(o, y) else - o.m = m - o.kernel = kernel _weightedhistogram!(o, y, weight) - _ash!(o) end + _ash!(o) end function Base.merge!(o::Ash, o2::Ash) diff --git a/test/runtests.jl b/test/runtests.jl index 5531dec..6e23722 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -109,19 +109,18 @@ end @testset "AshWeighted" begin x = randn(10_000) - o = ashw(x; weight = ones(21), rng = -1:0.1:1) + o = ash(x; weight = ones(21), rng = -1:0.1:1) o2 = ash(x; rng = -1:0.1:1) - o3 = ashw(x; rng = -1:0.1:1) - @test o == o2 == o3 + @test o == o2 y = rand(1000) w = rand(1:10, 11) o = ash(y; rng = 0:0.1:1) - ow = ashw(y; weight = w, rng = 0:0.1:1) + ow = ash(y; weight = w, rng = 0:0.1:1) @test o.counts .* w == ow.counts w = rand(1:10, 10) - @test_throws DimensionMismatch ashw(y; weight = w, rng = 0:0.1:1) + @test_throws DimensionMismatch ash(y; weight = w, rng = 0:0.1:1) end From 5fb7ecc2b68aef673f3960f5884d4fe4d1b72d20 Mon Sep 17 00:00:00 2001 From: Debartha Paul Date: Wed, 20 Sep 2023 22:42:23 +0530 Subject: [PATCH 5/5] Implement AbstractWeights - Implement AbstractWeights - Added tests for weights, aweights, fweights, pweights - Removed weight as a keyword argument and make it a positional argument instead - Updated documentations --- src/AverageShiftedHistograms.jl | 2 +- src/univariate.jl | 48 ++++++++++++++++----------------- test/runtests.jl | 31 +++++++++++---------- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/src/AverageShiftedHistograms.jl b/src/AverageShiftedHistograms.jl index bc3632a..1287c67 100755 --- a/src/AverageShiftedHistograms.jl +++ b/src/AverageShiftedHistograms.jl @@ -2,7 +2,7 @@ module AverageShiftedHistograms import UnicodePlots, RecipesBase using LinearAlgebra, Statistics -import StatsBase: nobs, fweights +import StatsBase: nobs, fweights, AbstractWeights export ash, ash!, extendrange, xy, xyz, nout, nobs, Kernels diff --git a/src/univariate.jl b/src/univariate.jl index 4422d94..6d26aa9 100644 --- a/src/univariate.jl +++ b/src/univariate.jl @@ -42,7 +42,7 @@ function _histogram!(o::Ash, y) end # add data to the weighted histogram -function _weightedhistogram!(o::Ash, y, weight) +function _weightedhistogram!(o::Ash, y, weight::AbstractWeights) b = length(o.rng) a = first(o.rng) b == length(weight) || throw(DimensionMismatch("Length of weights should be same as the length of the range")) @@ -80,18 +80,14 @@ end # Univariate Ash ash(x; kw...) -Fit an average shifted histogram to data `x`. Keyword options are: +Fit an average shifted histogram to data `x`. -- `rng` : values over which the density will be estimated -- `m` : Number of adjacent histograms to smooth over -- `kernel` : kernel used to smooth the estimate + ash(x, weight::AbstractWeights; kw...) -# Univariate Weighted Ash - ashw(x; weight, kw...) +Fit a weighted average shifted histogram to data `x`. -Fit a weighted average shifted histogram to data `x`, with weights being `weights`. Keyword options are: +Keyword options are: -- `weight` : weights (defaults to `nothing`) - `rng` : values over which the density will be estimated - `m` : Number of adjacent histograms to smooth over - `kernel` : kernel used to smooth the estimate @@ -112,21 +108,23 @@ Ash objectes can be updated with new data, new weights(only for univariates), sm # univariate ash!(obj; kw...) - ash!(obj, newx, kw...) - ashw!(obj, newx; kw...) + ash!(obj, newx; kw...) + ash!(obj, newx, weight::AbstractWeights; kw...) # bivariate ash!(obj; kw...) ash!(obj, newx, newy; kw...) """ -function ash(x; weight=nothing, nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) +function ash(x; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) o = Ash(rng, kernel, m) - if weight === nothing - _histogram!(o, x) - else - _weightedhistogram!(o, x, weight) - end + _histogram!(o, x) + _ash!(o) +end + +function ash(x, weight::AbstractWeights; nbin=500, rng::AbstractRange = extendrange(x, nbin), m = ceil(Int, length(rng)/100), kernel = Kernels.biweight) + o = Ash(rng, kernel, m) + _weightedhistogram!(o, x, weight) _ash!(o) end @@ -134,7 +132,7 @@ end """ ash!(o::Ash; kw...) ash!(o::Ash, newdata; kw...) - ashw!(o::Ash, newdata; kw...) + ash!(o::Ash, newdata, weight::AbstractWeights; kw...) Update an Ash estimate with new data, new weight, smoothing parameter (keyword `m`), or kernel (keyword `kernel`): """ @@ -143,14 +141,16 @@ function ash!(o::Ash; m = o.m, kernel = o.kernel) o.kernel = kernel _ash!(o) end -function ash!(o::Ash, y; weight=nothing, m = o.m, kernel = o.kernel) +function ash!(o::Ash, y; m = o.m, kernel = o.kernel) o.m = m o.kernel = kernel - if weight === nothing - _histogram!(o, y) - else - _weightedhistogram!(o, y, weight) - end + _histogram!(o, y) + _ash!(o) +end +function ash!(o::Ash, y, weight::AbstractWeights; m = o.m, kernel = o.kernel) + o.m = m + o.kernel = kernel + _weightedhistogram!(o, y, weight) _ash!(o) end diff --git a/test/runtests.jl b/test/runtests.jl index 6e23722..732e0e2 100755 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -108,20 +108,23 @@ end end @testset "AshWeighted" begin - x = randn(10_000) - o = ash(x; weight = ones(21), rng = -1:0.1:1) - o2 = ash(x; rng = -1:0.1:1) - @test o == o2 - - y = rand(1000) - w = rand(1:10, 11) - o = ash(y; rng = 0:0.1:1) - ow = ash(y; weight = w, rng = 0:0.1:1) - @test o.counts .* w == ow.counts - - w = rand(1:10, 10) - @test_throws DimensionMismatch ash(y; weight = w, rng = 0:0.1:1) - + weight_funcs = (weights, aweights, fweights, pweights) + + for f in weight_funcs + x = randn(10_000) + o = ash(x, f(ones(21)), rng = -1:0.1:1) + o2 = ash(x; rng = -1:0.1:1) + @test o == o2 + + y = rand(1000) + w = f(rand(1:10, 11)) + o = ash(y; rng = 0:0.1:1) + ow = ash(y, w; rng = 0:0.1:1) + @test o.counts .* w == ow.counts + + w = f(rand(1:10, 10)) + @test_throws DimensionMismatch ash(y, w; rng = 0:0.1:1) + end end end #module