Skip to content

Commit

Permalink
Test Direct IO and buffered IO modes with different posting list cach…
Browse files Browse the repository at this point in the history
…e sizes

Reduce down to always testing with 64 clients.
  • Loading branch information
vekterli committed Nov 7, 2024
1 parent cf0ad0c commit ea6dd62
Showing 1 changed file with 53 additions and 20 deletions.
73 changes: 53 additions & 20 deletions tests/performance/mmap_vs_directio/mmap_vs_directio.rb
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@ def teardown
super
end

def testing_locally?
false
end

def test_profile
if testing_locally?
{ :doc_count => 50_000,
:query_runtime => 20,
:cache_sizes_mb => [0, 16, 128, 1024] }
else
{ :doc_count => -1,
:query_runtime => 60,
:cache_sizes_mb => [0, 24, 256, 2 * 1024] }
end
end

def test_wikipedia_corpus_search_performance
set_description('Test search performance on English Wikipedia corpus and query set '+
'when file reading is done via either mmap or Direct IO')
Expand All @@ -27,53 +43,63 @@ def test_wikipedia_corpus_search_performance
@container = vespa.container.values.first
start

@profile = test_profile

@query_file_name = 'squad2-questions.fbench.141k.txt'
@no_stop_words_query_file_name = 'squad2-questions.max-df-20.fbench.141k.txt'

report_io_stat_deltas do
feed_file('enwiki-20240801-pages.1M.jsonl.zst')
feed_file('enwiki-20240801-pages.1M.jsonl.zst', @profile[:doc_count])
end

@search_node.trigger_flush # Shovel everything into a disk index
@search_node.execute("du -hS #{Environment.instance.vespa_home}/var/db/vespa/search/cluster.search/")

# One-shot warmup round with many clients. This helps measure contention for paging in data.
# Note that we don't tag as "warmup=true", as we want profiling enabled here as well.
puts "Warming up mmap'ed region with 64 clients"
report_io_stat_deltas do
benchmark_queries(@query_file_name, 'mmap_warmup', 64, false)
end
# MMap provides the baseline (expected best case) query performance, assuming all index data fits in memory.
deploy_and_run_queries(search_io_mode: 'MMAP')

['MMAP', 'DIRECTIO', 'NORMAL'].each do |io_mode|
deploy_and_run_queries(search_io_mode: io_mode)
['DIRECTIO', 'NORMAL'].each do |io_mode|
@profile[:cache_sizes_mb].each do |cache_size_mb|
deploy_and_run_queries(search_io_mode: io_mode, cache_size_mb: cache_size_mb)
end
end

stop
end

# Feeding must already have been done (using MMAP search_io_mode)
def deploy_and_run_queries(search_io_mode:)
def deploy_and_run_queries(search_io_mode:, cache_size_mb: 0)
if search_io_mode != 'MMAP'
vespa.stop_content_node('search', 0)
puts "Redeploying app with `search.io` mode '#{search_io_mode}'"
deploy_app(make_app(search_io_mode: search_io_mode))
puts "----------"
puts "Redeploying app with `search.io` mode '#{search_io_mode}', cache size #{cache_size_mb} MiB"
puts "----------"
deploy_app(make_app(search_io_mode: search_io_mode, cache_size_mb: cache_size_mb))
@search_node = vespa.search['search'].first
@container = vespa.container.values.first
vespa.start_content_node('search', 0)
sleep 2 # Allow for container health pings to catch up
end

pretty_mode = search_io_mode.downcase
puts "Searching with '#{pretty_mode}' search store backing"
[16, 32, 64].each do |clients|
report_io_stat_deltas do
benchmark_queries(@query_file_name, pretty_mode, clients, false)
end
cache_desc = cache_size_mb > 0 ? "#{cache_size_mb}mb_cache" : "nocache"
run_type = "#{pretty_mode}_#{cache_desc}"
clients = 64

unless search_io_mode == 'DIRECTIO' and cache_size_mb == 0
puts "Warming up cache"
report_io_stat_deltas do
benchmark_queries(@no_stop_words_query_file_name, "#{pretty_mode}_no_stop_words", clients, false)
benchmark_queries(@query_file_name, "#{run_type}_warmup", clients, true, @profile[:query_runtime])
end
end

puts "Searching with '#{pretty_mode}' search store backing using #{clients} clients"
report_io_stat_deltas do
benchmark_queries(@query_file_name, run_type, clients, false, @profile[:query_runtime])
end
report_io_stat_deltas do
benchmark_queries(@no_stop_words_query_file_name, "#{run_type}_no_stop_words", clients, false, @profile[:query_runtime])
end
end

def feed_file(feed_file, n_docs = -1)
Expand All @@ -93,8 +119,8 @@ def download_file(file_name, vespa_node)
download_file_from_s3(file_name, vespa_node, 'wikipedia')
end

def make_app(search_io_mode:)
SearchApp.new.sd(selfdir + 'wikimedia.sd').
def make_app(search_io_mode:, cache_size_mb: 0)
app = SearchApp.new.sd(selfdir + 'wikimedia.sd').
container(Container.new('default').
jvmoptions("-Xms16g -Xmx16g").
search(Searching.new).
Expand All @@ -103,6 +129,13 @@ def make_app(search_io_mode:)
indexing_cluster('default').
indexing_chain('indexing').
search_io(search_io_mode)

if search_io_mode != 'MMAP'
app.config(ConfigOverride.new('vespa.config.search.core.proton').
add('index', ConfigValue.new('postinglist',
ConfigValue.new('cache', ConfigValue.new('maxbytes', cache_size_mb * 1024 * 1024)))))
end
app
end

def report_io_stat_deltas
Expand Down

0 comments on commit ea6dd62

Please sign in to comment.