Skip to content

Commit

Permalink
Merge pull request #4238 from vespa-engine/hmusum/add-diversity-min-g…
Browse files Browse the repository at this point in the history
…roups-test

Add test of diversity with different min-groups settings
  • Loading branch information
hmusum authored Oct 25, 2024
2 parents 3917ca5 + 30abc95 commit 16cbe4c
Show file tree
Hide file tree
Showing 2 changed files with 116 additions and 0 deletions.
72 changes: 72 additions & 0 deletions tests/search/diversity/diversity.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
# Copyright Vespa.ai. All rights reserved.
require 'indexed_only_search_test'
require 'doc_generator'

class DiversityMinGroups < IndexedOnlySearchTest

def setup
set_owner('hmusum')
@docs = 20
end

def test_diversity_min_groups
deploy_app(SearchApp.new.sd(selfdir+"music.sd"))
start
feed_docs
wait_for_hitcount("query=sddocname:music", @docs)

# All docs match this query, but 1 doc has higher relevancy, see feed_docs()
puts "Query: 'rock'"
@expected_relevancy_best_doc = 1000.3818623835995
@expected_relevancy_rest = 1000.16343879032
@expected_relevancy_no_second_phase = 0.16343879032006287

assert_hitcount("query=rock", @docs)
assert_relevancy("query=rock&ranking=base", @expected_relevancy_best_doc, 0)
assert_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_best_doc, 0)
# Should get 1 hit that is doc 0, rest should have gone through second phase
check_relevancy("query=rock&ranking=diversity_min_groups_5", @expected_relevancy_rest, {0 => @expected_relevancy_best_doc})
end

def feed_docs
@docs.times.each { |i|
doc = Document.new('music', "id:test:music::#{i}")
if i == 0
doc.add_field('genre', 'rock')
doc.add_field('artist', 'The Clash')
doc.add_field('title', 'Rock the Casbah')
else
doc.add_field('genre', 'alternative')
doc.add_field('artist', 'Smashing Pumpkins')
doc.add_field('title', 'Cherub Rock')
end
vespa.document_api_v1.put(doc, :brief => true)
}
end

def check_relevancy(query, default_relevance, hit_number_to_relevance_mapping, hits=10)
result = search(query)
assert_equal(hits, result.hit.length)
hits.times.each { |i|
puts "hit #{i} relevance = #{relevance(result, i)}"
}
puts "---\n"
hits.times.each { |i|
expected_relevance = hit_number_to_relevance_mapping[i]
expected_relevance = default_relevance unless expected_relevance
hit = result.hit[i]
relevance = relevance(result, i)
assert_approx(expected_relevance, relevance, 0.01, "expected: #{expected_relevance}, got #{relevance} for hit #{i}: #{hit}")
}
end

def relevance(result, index)
result.hit[index].field['relevancy'].to_f
end

def teardown
stop
end

end

44 changes: 44 additions & 0 deletions tests/search/diversity/music.sd
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Copyright Vespa.ai. All rights reserved.

schema music {

document music {

field title type string {
indexing: index | summary
}

field artist type string {
indexing: index | summary
}

field genre type string {
indexing: summary | attribute
}

}

fieldset default {
fields: title, artist
}

rank-profile base inherits default {
first-phase {
expression: nativeRank(artist) + nativeRank(title)
}

second-phase {
expression: firstPhase() + 1000
rerank-count: 100
}
}

rank-profile diversity_min_groups_5 inherits base {
diversity {
attribute: genre
min-groups: 5
}
}

}

0 comments on commit 16cbe4c

Please sign in to comment.