diff --git a/tests/performance/wand_stopwords/MicroBmSearcher.java b/tests/performance/wand_stopwords/MicroBmSearcher.java index ccb672677..294a16a2b 100644 --- a/tests/performance/wand_stopwords/MicroBmSearcher.java +++ b/tests/performance/wand_stopwords/MicroBmSearcher.java @@ -90,17 +90,29 @@ public Result search(Query weakAndQuery, Execution execution) { execution.fill(andResult); var andSet = getHitIds(andResult); - Query weakAndQuery20 = changeRoot(weakAndQuery, new WeakAndItem()); - weakAndQuery20.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.20"); - Result weakAndResult20 = execution.search(weakAndQuery20); - execution.fill(weakAndResult20); - var weakAndSet20 = getHitIds(weakAndResult20); - - Query weakAndQuery05 = changeRoot(weakAndQuery, new WeakAndItem()); - weakAndQuery05.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.05"); - Result weakAndResult05 = execution.search(weakAndQuery05); - execution.fill(weakAndResult05); - var weakAndSet05 = getHitIds(weakAndResult05); + Query weakAndQueryA10 = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryA10.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.10"); + Result weakAndResultA10 = execution.search(weakAndQueryA10); + execution.fill(weakAndResultA10); + var weakAndSetA10 = getHitIds(weakAndResultA10); + + Query weakAndQueryA02 = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryA02.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.02"); + Result weakAndResultA02 = execution.search(weakAndQueryA02); + execution.fill(weakAndResultA02); + var weakAndSetA02 = getHitIds(weakAndResultA02); + + Query weakAndQueryS20 = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryS20.properties().set("rankproperty.vespa.matching.weakand.stop_word_score_limit", "0.20"); + Result weakAndResultS20 = execution.search(weakAndQueryS20); + execution.fill(weakAndResultS20); + var weakAndSetS20 = getHitIds(weakAndResultS20); + + Query weakAndQueryS05 = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryS05.properties().set("rankproperty.vespa.matching.weakand.stop_word_score_limit", "0.05"); + Result weakAndResultS05 = execution.search(weakAndQueryS05); + execution.fill(weakAndResultS05); + var weakAndSetS05 = getHitIds(weakAndResultS05); Query weakAndQueryD20 = changeRoot(weakAndQuery, new WeakAndItem()); weakAndQueryD20.properties().set("rankproperty.vespa.matching.weakand.stop_word_drop_limit", "0.20"); @@ -114,36 +126,55 @@ public Result search(Query weakAndQuery, Execution execution) { execution.fill(weakAndResultD05); var weakAndSetD05 = getHitIds(weakAndResultD05); + Query weakAndQueryX = changeRoot(weakAndQuery, new WeakAndItem()); + weakAndQueryX.properties().set("rankproperty.vespa.matching.weakand.stop_word_adjust_limit", "0.01"); + weakAndQueryX.properties().set("rankproperty.vespa.matching.weakand.stop_word_drop_limit", "0.30"); + Result weakAndResultX = execution.search(weakAndQueryX); + execution.fill(weakAndResultX); + var weakAndSetX = getHitIds(weakAndResultX); + // double orTime = timeQuery(orQuery, execution, 1); double weakAndTime = timeQuery(weakAndQuery, execution); - double weakAndTime20 = timeQuery(weakAndQuery20, execution); - double weakAndTime05 = timeQuery(weakAndQuery05, execution); + double weakAndTimeA10 = timeQuery(weakAndQueryA10, execution); + double weakAndTimeA02 = timeQuery(weakAndQueryA02, execution); + double weakAndTimeS20 = timeQuery(weakAndQueryS20, execution); + double weakAndTimeS05 = timeQuery(weakAndQueryS05, execution); double weakAndTimeD20 = timeQuery(weakAndQueryD20, execution); double weakAndTimeD05 = timeQuery(weakAndQueryD05, execution); + double weakAndTimeX = timeQuery(weakAndQueryX, execution); double andTime = timeQuery(andQuery, execution); Hit meta = new Hit("meta"); meta.setMeta(true); meta.setField("andQuality", quality(orSet, andSet)); meta.setField("weakAndQuality", quality(orSet, weakAndSet)); - meta.setField("weakAndQuality20", quality(orSet, weakAndSet20)); - meta.setField("weakAndQuality05", quality(orSet, weakAndSet05)); + meta.setField("weakAndQualityA10", quality(orSet, weakAndSetA10)); + meta.setField("weakAndQualityA02", quality(orSet, weakAndSetA02)); + meta.setField("weakAndQualityS20", quality(orSet, weakAndSetS20)); + meta.setField("weakAndQualityS05", quality(orSet, weakAndSetS05)); meta.setField("weakAndQualityD20", quality(orSet, weakAndSetD20)); meta.setField("weakAndQualityD05", quality(orSet, weakAndSetD05)); + meta.setField("weakAndQualityX", quality(orSet, weakAndSetX)); meta.setField("orHits", orResult.getTotalHitCount()); meta.setField("andHits", andResult.getTotalHitCount()); meta.setField("weakAndHits", weakAndResult.getTotalHitCount()); - meta.setField("weakAndHits20", weakAndResult20.getTotalHitCount()); - meta.setField("weakAndHits05", weakAndResult05.getTotalHitCount()); + meta.setField("weakAndHitsA10", weakAndResultA10.getTotalHitCount()); + meta.setField("weakAndHitsA02", weakAndResultA02.getTotalHitCount()); + meta.setField("weakAndHitsS20", weakAndResultS20.getTotalHitCount()); + meta.setField("weakAndHitsS05", weakAndResultS05.getTotalHitCount()); meta.setField("weakAndHitsD20", weakAndResultD20.getTotalHitCount()); meta.setField("weakAndHitsD05", weakAndResultD05.getTotalHitCount()); + meta.setField("weakAndHitsX", weakAndResultX.getTotalHitCount()); meta.setField("orTime", orTime); meta.setField("andTime", andTime); meta.setField("weakAndTime", weakAndTime); - meta.setField("weakAndTime20", weakAndTime20); - meta.setField("weakAndTime05", weakAndTime05); + meta.setField("weakAndTimeA10", weakAndTimeA10); + meta.setField("weakAndTimeA02", weakAndTimeA02); + meta.setField("weakAndTimeS20", weakAndTimeS20); + meta.setField("weakAndTimeS05", weakAndTimeS05); meta.setField("weakAndTimeD20", weakAndTimeD20); meta.setField("weakAndTimeD05", weakAndTimeD05); + meta.setField("weakAndTimeX", weakAndTimeX); Result result = new Result(weakAndQuery); result.setTotalHitCount(weakAndResult.getTotalHitCount()); result.hits().add(meta); diff --git a/tests/performance/wand_stopwords/wand_stopwords.rb b/tests/performance/wand_stopwords/wand_stopwords.rb index b8b9a8674..ddc8232fc 100644 --- a/tests/performance/wand_stopwords/wand_stopwords.rb +++ b/tests/performance/wand_stopwords/wand_stopwords.rb @@ -69,24 +69,33 @@ def measure_wand_quality andQ = [] orQ = [] waQ = [] - wa20Q = [] - wa05Q = [] + waA10Q = [] + waA02Q = [] + waS20Q = [] + waS05Q = [] waD20Q = [] waD05Q = [] + waXQ = [] andH = [] orH = [] waH = [] - wa20H = [] - wa05H = [] + waA10H = [] + waA02H = [] + waS20H = [] + waS05H = [] waD20H = [] waD05H = [] + waXH = [] andT = [] orT = [] waT = [] - wa20T = [] - wa05T = [] + waA10T = [] + waA02T = [] + waS20T = [] + waS05T = [] waD20T = [] waD05T = [] + waXT = [] counter = 0 q_file = download_file('squad2-questions.raw.141k.txt.zst', vespa.adminserver) vespa.adminserver.execute("zstdcat #{q_file} | head -n 1000 > #{q_file}.raw") @@ -99,24 +108,33 @@ def measure_wand_quality h = r.hit[0] andQ.append(h.field['andQuality']) waQ.append(h.field['weakAndQuality']) - wa20Q.append(h.field['weakAndQuality20']) - wa05Q.append(h.field['weakAndQuality05']) + waA10Q.append(h.field['weakAndQualityA10']) + waA02Q.append(h.field['weakAndQualityA02']) + waS20Q.append(h.field['weakAndQualityS20']) + waS05Q.append(h.field['weakAndQualityS05']) waD20Q.append(h.field['weakAndQualityD20']) waD05Q.append(h.field['weakAndQualityD05']) + waXQ.append(h.field['weakAndQualityX']) andH.append(h.field['andHits']) orH.append(h.field['orHits']) waH.append(h.field['weakAndHits']) - wa20H.append(h.field['weakAndHits20']) - wa05H.append(h.field['weakAndHits05']) + waA10H.append(h.field['weakAndHitsA10']) + waA02H.append(h.field['weakAndHitsA02']) + waS20H.append(h.field['weakAndHitsS20']) + waS05H.append(h.field['weakAndHitsS05']) waD20H.append(h.field['weakAndHitsD20']) waD05H.append(h.field['weakAndHitsD05']) + waXH.append(h.field['weakAndHitsX']) andT.append(h.field['andTime']) orT.append(h.field['orTime']) waT.append(h.field['weakAndTime']) - wa20T.append(h.field['weakAndTime20']) - wa05T.append(h.field['weakAndTime05']) + waA10T.append(h.field['weakAndTimeA10']) + waA02T.append(h.field['weakAndTimeA02']) + waS20T.append(h.field['weakAndTimeS20']) + waS05T.append(h.field['weakAndTimeS05']) waD20T.append(h.field['weakAndTimeD20']) waD05T.append(h.field['weakAndTimeD05']) + waXT.append(h.field['weakAndTimeX']) quality = h.field['weakAndQuality'] wantedHits = max(h.field['andHits'], min(100, h.field['orHits'])) hitsFactor = (1000 * h.field['weakAndHits']) / wantedHits @@ -130,26 +148,35 @@ def measure_wand_quality sz = andQ.size puts "== Average and median over #{sz} results ==" process("AND-recall", "recall@100", andQ) - process("WeakAnd-100-recall", "recall@100", waQ) - process("WeakAnd-20-recall", "recall@100", wa20Q) - process("WeakAnd-5-recall", "recall@100", wa05Q) - process("WeakAnd-D20-recall", "recall@100", waD20Q) - process("WeakAnd-D5-recall", "recall@100", waD05Q) + process("WeakAnd-recall", "recall@100", waQ) + process("WeakAnd-A10-recall", "recall@100", waA10Q) + process("WeakAnd-A2-recall", "recall@100", waA02Q) + process("WeakAnd-S20-recall", "recall@100", waS20Q) + process("WeakAnd-S5-recall", "recall@100", waS05Q) + process("WeakAnd-D20-recall", "recall@100", waD20Q) + process("WeakAnd-D5-recall", "recall@100", waD05Q) + process("WeakAnd-X-recall", "recall@100", waXQ) process("AND-hits", "hits", andH) - process("WeakAnd-100-hits", "hits", waH) - process("WeakAnd-20-hits", "hits", wa20H) - process("WeakAnd-5-hits", "hits", wa05H) - process("WeakAnd-D20-hits", "hits", waD20H) - process("WeakAnd-D5-hits", "hits", waD05H) + process("WeakAnd-hits", "hits", waH) + process("WeakAnd-A10-hits", "hits", waA10H) + process("WeakAnd-A2-hits", "hits", waA02H) + process("WeakAnd-S20-hits", "hits", waS20H) + process("WeakAnd-S5-hits", "hits", waS05H) + process("WeakAnd-D20-hits", "hits", waD20H) + process("WeakAnd-D5-hits", "hits", waD05H) + process("WeakAnd-X-hits", "hits", waXH) process("OR-hits", "hits", orH) process("AND-ms", "latency", andT) - process("WeakAnd-100-ms", "latency", waT) - process("WeakAnd-20-ms", "latency", wa20T) - process("WeakAnd-5-ms", "latency", wa05T) - process("WeakAnd-D20-ms", "latency", waD20T) - process("WeakAnd-D5-ms", "latency", waD05T) + process("WeakAnd-ms", "latency", waT) + process("WeakAnd-A10-ms", "latency", waA10T) + process("WeakAnd-A2-ms", "latency", waA02T) + process("WeakAnd-S20-ms", "latency", waS20T) + process("WeakAnd-S5-ms", "latency", waS05T) + process("WeakAnd-D20-ms", "latency", waD20T) + process("WeakAnd-D5-ms", "latency", waD05T) + process("WeakAnd-X-ms", "latency", waXT) process("OR-ms", "latency", orT) end