Skip to content

Commit

Permalink
Reciprocal Rank Fusion
Browse files Browse the repository at this point in the history
  • Loading branch information
svilupp authored Jul 1, 2024
1 parent af4f67f commit 44450c0
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 2 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

## [0.35.0]

### Added
- Added a utility function to RAGTools `reciprocal_rank_fusion`, as a principled way to merge multiple rankings. See `?RAGTools.Experimental.reciprocal_rank_fusion` for more information.

## [0.34.0]

### Added
Expand Down
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "PromptingTools"
uuid = "670122d1-24a8-4d70-bfce-740807c42192"
authors = ["J S @svilupp and contributors"]
version = "0.34.0"
version = "0.35.0"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
29 changes: 29 additions & 0 deletions src/Experimental/RAGTools/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -563,3 +563,32 @@ function unpack_bits(packed_matrix::AbstractMatrix{UInt64})

return output_matrix
end

"""
reciprocal_rank_fusion(args...; k::Int=60)
Merges multiple rankings and calculates the reciprocal rank score for each chunk (discounted by the inverse of the rank).
# Example
```julia
positions1 = [1, 3, 5, 7, 9]
positions2 = [2, 4, 6, 8, 10]
positions3 = [2, 4, 6, 11, 12]
merged_positions, scores = reciprocal_rank_fusion(positions1, positions2, positions3)
```
"""
function reciprocal_rank_fusion(args...; k::Int = 60)
merged = Vector{Int}()
scores = Dict{Int, Float64}()

for positions in args
for (idx, pos) in enumerate(positions)
scores[pos] = get(scores, pos, 0.0) + 1.0 / (k + idx)
end
end

merged = [first(item) for item in sort(collect(scores), by = last, rev = true)]

return merged, scores
end
53 changes: 52 additions & 1 deletion test/Experimental/RAGTools/utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ using PromptingTools.Experimental.RAGTools: token_with_boundaries, text_to_trigr
using PromptingTools.Experimental.RAGTools: split_into_code_and_sentences
using PromptingTools.Experimental.RAGTools: getpropertynested, setpropertynested,
merge_kwargs_nested
using PromptingTools.Experimental.RAGTools: pack_bits, unpack_bits, preprocess_tokens
using PromptingTools.Experimental.RAGTools: pack_bits, unpack_bits, preprocess_tokens,
reciprocal_rank_fusion

@testset "_check_aiextract_capability" begin
@test _check_aiextract_capability("gpt-3.5-turbo") == nothing
Expand Down Expand Up @@ -548,3 +549,53 @@ end
@test_throws ArgumentError RT._stem(nothing, "abc")
@test_throws ArgumentError RT._unicode_normalize(nothing)
end

@testset "reciprocal_rank_fusion" begin
# Test with two simple lists
positions, scores = reciprocal_rank_fusion([1, 2, 3], [4, 5, 6]; k = 0)
@test Set(positions) == Set([1, 2, 3, 4, 5, 6])
@test Set(positions[1:2]) == Set([1, 4])
@test Set(positions[3:4]) == Set([2, 5])
@test Set(positions[5:6]) == Set([3, 6])
@test scores == Dict(1 => 1.0, 2 => 0.5, 3 => 0.3333333333333333,
4 => 1.0, 5 => 0.5, 6 => 0.3333333333333333)

# Test with overlapping lists
positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4]; k = 0)
@test Set(positions) == Set([2, 3, 1, 4])
@test positions[1] == 2
@test positions[2] == 1
@test positions[3] == 3
@test positions[4] == 4

# Higher discount to reward more appearances
positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4]; k = 60)
@test Set(positions) == Set([2, 3, 1, 4])
@test positions[1] == 2
@test positions[2] == 3
@test positions[3] == 1
@test positions[4] == 4

# Test with three lists
positions, scores = reciprocal_rank_fusion([1, 2, 3], [2, 3, 4], [3, 4, 5]; k = 0)
@test Set(positions) == Set([3, 2, 4, 1, 5])
@test positions[1] == 3
@test positions[2] == 2
@test positions[3] == 1
@test positions[4] == 4
@test positions[5] == 5

# Test with empty list
@test reciprocal_rank_fusion([]; k = 0) == ([], Dict{Int, Float64}())

# Test with one empty and one non-empty list
@test reciprocal_rank_fusion([], [1, 2, 3]; k = 0) ==
([1, 2, 3], Dict(1 => 1.0, 2 => 0.5, 3 => 0.3333333333333333))

# Test with different lengths of lists
positions, scores = reciprocal_rank_fusion([1, 2], [3, 4, 5]; k = 0)
@test Set(positions) == Set([1, 2, 3, 4, 5])
@test Set(positions[1:2]) == Set([1, 3])
@test Set(positions[3:4]) == Set([2, 4])
@test positions[5] == 5
end

0 comments on commit 44450c0

Please sign in to comment.