Skip to content

Commit

Permalink
Check relvancy of all hits, show that some hits are not going
Browse files Browse the repository at this point in the history
through second phase ranking when there are few docs, but works
with more docs
  • Loading branch information
hmusum committed Oct 25, 2024
1 parent 776d7f9 commit 1d0d32d
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 19 deletions.
45 changes: 34 additions & 11 deletions tests/search/diversity/diversity.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,27 @@ class DiversityMinGroups < IndexedOnlySearchTest

def setup
set_owner('hmusum')
@docs = 50
@expected_relevancy = 1000.3344587750165
@docs = 20
end

def test_diversity_min_groups
deploy_app(SearchApp.new.sd(selfdir+"music.sd"))
start
feed_docs
wait_for_hitcount("query=sddocname:music", @docs)
# All docs match this query, but 1 doc has lower relevancy, see feed_docs()
assert_hitcount("query=cherub+rock", @docs)
assert_hitcount("query=cherub+rock&ranking=base", @docs)

assert_relevancy("query=cherub+rock&ranking=base", @expected_relevancy, 0)
assert_relevancy("query=cherub+rock&ranking=diversity", @expected_relevancy, 0)
# diversity.min-groups is 60 in the 'diversity_many_groups' rank profile, more than number of docs
# => not aenough docs to fulfill min-groups criteria
assert_relevancy("query=cherub+rock&ranking=diversity_many_groups", @expected_relevancy, 0)

# All docs match this query, but 1 doc has higher relevancy, see feed_docs()
puts "Query: 'rock'"
@expected_relevancy_best_doc = 1000.3818623835995
@expected_relevancy_rest = 1000.16343879032
@expected_relevancy_no_second_phase = 0.16343879032006287

assert_hitcount("query=rock", @docs)
assert_relevancy("query=rock&ranking=base", @expected_relevancy_best_doc, 0)
assert_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_best_doc, 0)
# Should get 1 hit that is doc 0, rest should have gone through second phase
# TODO: Fails with @docs = 20, works with @docs = 50
check_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_rest, {0 => @expected_relevancy_best_doc})
end

def feed_docs
Expand All @@ -42,6 +45,26 @@ def feed_docs
}
end

def check_relevancy(query, default_relevance, hit_number_to_relevance_mapping, hits=10)
result = search(query)
assert_equal(hits, result.hit.length)
hits.times.each { |i|
puts "hit #{i} relevance = #{relevance(result, i)}"
}
puts "---\n"
hits.times.each { |i|
expected_relevance = hit_number_to_relevance_mapping[i]
expected_relevance = default_relevance unless expected_relevance
hit = result.hit[i]
relevance = relevance(result, i)
assert_approx(expected_relevance, relevance, 0.01, "expected: #{expected_relevance}, got #{relevance} for hit #{i}: #{hit}")
}
end

def relevance(result, index)
result.hit[index].field['relevancy'].to_f
end

def teardown
stop
end
Expand Down
10 changes: 2 additions & 8 deletions tests/search/diversity/music.sd
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,16 @@ schema music {

second-phase {
expression: firstPhase() + 1000
rerank-count: 100
}
}

rank-profile diversity inherits base {
rank-profile diversity_min_groups_5 inherits base {
diversity {
attribute: genre
min-groups: 5
}
}

rank-profile diversity_many_groups inherits diversity {
diversity {
attribute: genre
min-groups: 60
}
}

}

0 comments on commit 1d0d32d

Please sign in to comment.