-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #4238 from vespa-engine/hmusum/add-diversity-min-g…
…roups-test Add test of diversity with different min-groups settings
- Loading branch information
Showing
2 changed files
with
116 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright Vespa.ai. All rights reserved. | ||
require 'indexed_only_search_test' | ||
require 'doc_generator' | ||
|
||
class DiversityMinGroups < IndexedOnlySearchTest | ||
|
||
def setup | ||
set_owner('hmusum') | ||
@docs = 20 | ||
end | ||
|
||
def test_diversity_min_groups | ||
deploy_app(SearchApp.new.sd(selfdir+"music.sd")) | ||
start | ||
feed_docs | ||
wait_for_hitcount("query=sddocname:music", @docs) | ||
|
||
# All docs match this query, but 1 doc has higher relevancy, see feed_docs() | ||
puts "Query: 'rock'" | ||
@expected_relevancy_best_doc = 1000.3818623835995 | ||
@expected_relevancy_rest = 1000.16343879032 | ||
@expected_relevancy_no_second_phase = 0.16343879032006287 | ||
|
||
assert_hitcount("query=rock", @docs) | ||
assert_relevancy("query=rock&ranking=base", @expected_relevancy_best_doc, 0) | ||
assert_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_best_doc, 0) | ||
# Should get 1 hit that is doc 0, rest should have gone through second phase | ||
check_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_rest, {0 => @expected_relevancy_best_doc}) | ||
end | ||
|
||
def feed_docs | ||
@docs.times.each { |i| | ||
doc = Document.new('music', "id:test:music::#{i}") | ||
if i == 0 | ||
doc.add_field('genre', 'rock') | ||
doc.add_field('artist', 'The Clash') | ||
doc.add_field('title', 'Rock the Casbah') | ||
else | ||
doc.add_field('genre', 'alternative') | ||
doc.add_field('artist', 'Smashing Pumpkins') | ||
doc.add_field('title', 'Cherub Rock') | ||
end | ||
vespa.document_api_v1.put(doc, :brief => true) | ||
} | ||
end | ||
|
||
def check_relevancy(query, default_relevance, hit_number_to_relevance_mapping, hits=10) | ||
result = search(query) | ||
assert_equal(hits, result.hit.length) | ||
hits.times.each { |i| | ||
puts "hit #{i} relevance = #{relevance(result, i)}" | ||
} | ||
puts "---\n" | ||
hits.times.each { |i| | ||
expected_relevance = hit_number_to_relevance_mapping[i] | ||
expected_relevance = default_relevance unless expected_relevance | ||
hit = result.hit[i] | ||
relevance = relevance(result, i) | ||
assert_approx(expected_relevance, relevance, 0.01, "expected: #{expected_relevance}, got #{relevance} for hit #{i}: #{hit}") | ||
} | ||
end | ||
|
||
def relevance(result, index) | ||
result.hit[index].field['relevancy'].to_f | ||
end | ||
|
||
def teardown | ||
stop | ||
end | ||
|
||
end | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Copyright Vespa.ai. All rights reserved. | ||
|
||
schema music { | ||
|
||
document music { | ||
|
||
field title type string { | ||
indexing: index | summary | ||
} | ||
|
||
field artist type string { | ||
indexing: index | summary | ||
} | ||
|
||
field genre type string { | ||
indexing: summary | attribute | ||
} | ||
|
||
} | ||
|
||
fieldset default { | ||
fields: title, artist | ||
} | ||
|
||
rank-profile base inherits default { | ||
first-phase { | ||
expression: nativeRank(artist) + nativeRank(title) | ||
} | ||
|
||
second-phase { | ||
expression: firstPhase() + 1000 | ||
rerank-count: 100 | ||
} | ||
} | ||
|
||
rank-profile diversity_min_groups_5 inherits base { | ||
diversity { | ||
attribute: genre | ||
min-groups: 5 | ||
} | ||
} | ||
|
||
} | ||
|