From 195b9c8ca43e8155d2a402cff56aeb06fa5cf300 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?H=C3=A5vard=20Pettersen?= Date: Wed, 4 Dec 2024 11:03:24 +0000 Subject: [PATCH] X is now d60b5a1 (golden sample from quality test) Y is adjusted old X (d66a1 -> d60a1) --- .../wand_stopwords/MicroBmSearcher.java | 14 +++++++++++++- tests/performance/wand_stopwords/wand_stopwords.rb | 9 +++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/tests/performance/wand_stopwords/MicroBmSearcher.java b/tests/performance/wand_stopwords/MicroBmSearcher.java index fdd8a6178..f71f6442d 100644 --- a/tests/performance/wand_stopwords/MicroBmSearcher.java +++ b/tests/performance/wand_stopwords/MicroBmSearcher.java @@ -116,11 +116,19 @@ public Result search(Query weakAndQuery, Execution execution) { Query weakAndQueryX = changeRoot(weakAndQuery, new WeakAndItem()); weakAndQueryX.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.01"); - weakAndQueryX.properties().set("rankproperty.vespa.matching.weakand.stop_word_drop_limit", "0.66"); + weakAndQueryX.properties().set("rankproperty.vespa.matching.diskindex.bitvector_limit", "0.05"); + weakAndQueryX.properties().set("rankproperty.vespa.matching.weakand.stop_word_drop_limit", "0.60"); Result weakAndResultX = execution.search(weakAndQueryX); execution.fill(weakAndResultX); var weakAndSetX = getHitIds(weakAndResultX); + Query weakAndQueryY = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryY.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.01"); + weakAndQueryY.properties().set("rankproperty.vespa.matching.weakand.stop_word_drop_limit", "0.60"); + Result weakAndResultY = execution.search(weakAndQueryY); + execution.fill(weakAndResultY); + var weakAndSetY = getHitIds(weakAndResultY); + // double orTime = timeQuery(orQuery, execution, 1); double weakAndTime = timeQuery(weakAndQuery, execution); double weakAndTimeA10 = timeQuery(weakAndQueryA10, execution); @@ -128,6 +136,7 @@ public Result search(Query weakAndQuery, Execution execution) { double weakAndTimeD20 = timeQuery(weakAndQueryD20, execution); double weakAndTimeD05 = timeQuery(weakAndQueryD05, execution); double weakAndTimeX = timeQuery(weakAndQueryX, execution); + double weakAndTimeY = timeQuery(weakAndQueryY, execution); double andTime = timeQuery(andQuery, execution); Hit meta = new Hit("meta"); @@ -139,6 +148,7 @@ public Result search(Query weakAndQuery, Execution execution) { meta.setField("weakAndQualityD20", quality(orSet, weakAndSetD20)); meta.setField("weakAndQualityD05", quality(orSet, weakAndSetD05)); meta.setField("weakAndQualityX", quality(orSet, weakAndSetX)); + meta.setField("weakAndQualityY", quality(orSet, weakAndSetY)); meta.setField("orHits", orResult.getTotalHitCount()); meta.setField("andHits", andResult.getTotalHitCount()); meta.setField("weakAndHits", weakAndResult.getTotalHitCount()); @@ -147,6 +157,7 @@ public Result search(Query weakAndQuery, Execution execution) { meta.setField("weakAndHitsD20", weakAndResultD20.getTotalHitCount()); meta.setField("weakAndHitsD05", weakAndResultD05.getTotalHitCount()); meta.setField("weakAndHitsX", weakAndResultX.getTotalHitCount()); + meta.setField("weakAndHitsY", weakAndResultY.getTotalHitCount()); meta.setField("orTime", orTime); meta.setField("andTime", andTime); meta.setField("weakAndTime", weakAndTime); @@ -155,6 +166,7 @@ public Result search(Query weakAndQuery, Execution execution) { meta.setField("weakAndTimeD20", weakAndTimeD20); meta.setField("weakAndTimeD05", weakAndTimeD05); meta.setField("weakAndTimeX", weakAndTimeX); + meta.setField("weakAndTimeY", weakAndTimeY); Result result = new Result(weakAndQuery); result.setTotalHitCount(weakAndResult.getTotalHitCount()); result.hits().add(meta); diff --git a/tests/performance/wand_stopwords/wand_stopwords.rb b/tests/performance/wand_stopwords/wand_stopwords.rb index 0666555e3..2705024eb 100644 --- a/tests/performance/wand_stopwords/wand_stopwords.rb +++ b/tests/performance/wand_stopwords/wand_stopwords.rb @@ -74,6 +74,7 @@ def measure_wand_quality waD20Q = [] waD05Q = [] waXQ = [] + waYQ = [] andH = [] orH = [] waH = [] @@ -82,6 +83,7 @@ def measure_wand_quality waD20H = [] waD05H = [] waXH = [] + waYH = [] andT = [] orT = [] waT = [] @@ -90,6 +92,7 @@ def measure_wand_quality waD20T = [] waD05T = [] waXT = [] + waYT = [] counter = 0 q_file = download_file('squad2-questions.raw.141k.txt.zst', vespa.adminserver) vespa.adminserver.execute("zstdcat #{q_file} | head -n 1000 > #{q_file}.raw") @@ -107,6 +110,7 @@ def measure_wand_quality waD20Q.append(h.field['weakAndQualityD20']) waD05Q.append(h.field['weakAndQualityD05']) waXQ.append(h.field['weakAndQualityX']) + waYQ.append(h.field['weakAndQualityY']) andH.append(h.field['andHits']) orH.append(h.field['orHits']) waH.append(h.field['weakAndHits']) @@ -115,6 +119,7 @@ def measure_wand_quality waD20H.append(h.field['weakAndHitsD20']) waD05H.append(h.field['weakAndHitsD05']) waXH.append(h.field['weakAndHitsX']) + waYH.append(h.field['weakAndHitsY']) andT.append(h.field['andTime']) orT.append(h.field['orTime']) waT.append(h.field['weakAndTime']) @@ -123,6 +128,7 @@ def measure_wand_quality waD20T.append(h.field['weakAndTimeD20']) waD05T.append(h.field['weakAndTimeD05']) waXT.append(h.field['weakAndTimeX']) + waYT.append(h.field['weakAndTimeY']) quality = h.field['weakAndQuality'] wantedHits = max(h.field['andHits'], min(100, h.field['orHits'])) hitsFactor = (1000 * h.field['weakAndHits']) / wantedHits @@ -142,6 +148,7 @@ def measure_wand_quality process("WeakAnd-D20-recall", "recall@100", waD20Q) process("WeakAnd-D5-recall", "recall@100", waD05Q) process("WeakAnd-X-recall", "recall@100", waXQ) + process("WeakAnd-Y-recall", "recall@100", waYQ) process("AND-hits", "hits", andH) process("WeakAnd-hits", "hits", waH) @@ -150,6 +157,7 @@ def measure_wand_quality process("WeakAnd-D20-hits", "hits", waD20H) process("WeakAnd-D5-hits", "hits", waD05H) process("WeakAnd-X-hits", "hits", waXH) + process("WeakAnd-Y-hits", "hits", waYH) process("OR-hits", "hits", orH) process("AND-ms", "latency", andT) @@ -159,6 +167,7 @@ def measure_wand_quality process("WeakAnd-D20-ms", "latency", waD20T) process("WeakAnd-D5-ms", "latency", waD05T) process("WeakAnd-X-ms", "latency", waXT) + process("WeakAnd-Y-ms", "latency", waYT) process("OR-ms", "latency", orT) end